#rmarkdown tips
# https://bookdown.org/yihui/rmarkdown/basics.html
### for code folding, put this under html options above and set echo = T below:
# output:
# html_document:
# code_folding: hide
# make sections with R code
# https://stackoverflow.com/questions/36674824/use-loop-to-generate-section-of-text-in-rmarkdown
knitr::opts_chunk$set(echo = T, warning=FALSE, message=FALSE, results = 'hold', fig.width = 7, fig.height = 7)
#set whole project directory (use this instead of setwd() in rmd)
# knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
#hopefully commenting just works with cwd
#set do not stop knitting even with errors; obviously dangerous, for testing only
knitr::opts_chunk$set(
error = F # if true, do not interrupt in case of errors
)
#set timer
timestart = proc.time()
#set memory limit
# this is for parallel via future (for markers)
# some seurat functions also rely on this under the hood...
# pipeline may fail if this is not set
# options(future.globals.maxSize = 850*1024^2) # 850 MB
# options(future.globals.maxSize = 850*1024^2) # 1 GB
#options(future.globals.maxSize = 8000 * 1024^2) # 8 GB
# mostly for Seurat future, increase max mem per core, don't think anything except future uses this
options(future.globals.maxSize = 15000 * 1024^2) # 15 GB
# set seed
set.seed(2022)
## for testing and / or manually setting things only;
## keep commented otherwise
# bindingIsLocked("params", env = .GlobalEnv)
# unlockBinding("params", env = .GlobalEnv)
#
# projdir = '/Users/alexanderferrena/Dropbox (EinsteinMed)/Kevyn_SDAP_Test/'
#
# params <- list(
# datadir = paste0(projdir, 'NewReplicate_GFPpos_GFPneg/datadir/'),
# outdir = paste0(projdir, 'outs/DEVTEST/' ),
#
# use_labeltransfer = F,
# # refdatapath = paste0(projdir, 'Reference_Seurat_Generation/Fuwai_E105_Wn1Cre_Hearts/CNCCsE105_Dub_namedClusters.rds'),
# # m_reference = paste0(projdir, 'Reference_Seurat_Generation/Fuwai_E105_Wn1Cre_Hearts/Markers.rds'),
#
# sample_metadata = paste0(projdir, '/NewReplicate_GFPpos_GFPneg/sample_metadata.csv'),
# comps = paste0(projdir, '/NewReplicate_GFPpos_GFPneg/comps.csv'),
#
# min_num_UMI = 500,
# min_num_Feature = 200,
# max_perc_mito = 25,
# max_perc_hemoglobin = 25,
# autofilter_complexity = F,
# autofilter_mito = FALSE,
# autofilter_nUMI = F,
# autofilter_medianabsolutedev_threshold = 3,
# autofilter_loess_negative_residual_threshold = -5,
#
# doubletFinder = T,
#
#
# pcs_indi = 30,
# res_indi = 0.5,
# pcs_int = 30,
# res_int = 1,
# RISC_louvain_neighbors=10,
#
# risc_reference = NULL,
# crossconditionDE_padj_thres = NULL,
# crossconditionDE_lfc_thres = NULL,
# pathway_padj_thres = 0.1,
# species = 'Mus musculus',
# workernum = 4,
# input_seurat_obj = F,
# force_redo = F
# )
#set verbosity --> deprecated, keep false
verbose=F
## key params, setting paths
datadir <- paste0(params$datadir, '/')
if(is.null(datadir)){stop('Please set datadir, path to cellranger results')}
#input seurat obj
input_seurat_obj <- params$input_seurat_obj
if(is.null(input_seurat_obj)){input_seurat_obj <- F}
# set output dir
outdir <- paste0(params$outdir, '/')
if(is.null(outdir)){stop('Please set outdir, output folder')}
dir.create(outdir, recursive = T)
#set up output dir for individual samples
outdir_indi <- paste0(outdir, '/individualsample_analysis')
dir.create(outdir_indi)
#prep a dir for int results
outdir_int <- paste0(outdir, '/multisample_integration/')
dir.create(outdir_int)
#set path to reference data for label transfer.
use_labeltransfer <- params$use_labeltransfer
if(is.null(use_labeltransfer)){stop('Please set use_labeltransfer to T/F; if T, please also provide paths refdatapath and m_reference')}
if(use_labeltransfer == T){
refdatapath <- params$refdatapath
if(is.null(refdatapath)){stop('Please set refdatapath, path to reference scRNAseq Seurat object .rds file with "Celltype" in meta.data')}
# ref markers
m_reference <- params$m_reference
### just added: SeuratLabelTransfer.normalization.method
# SeuratLabelTransfer.normalization.method <- params$SeuratLabelTransfer.normalization.method
if(is.null(m_reference)){stop('Please set m_reference, path to FindAllMarkers result from reference celltypes, saved as .rds file')}
} else{
refdatapath <- NA
m_reference <- NA
# SeuratLabelTransfer.normalization.method <- NA
}
#de test use
Pseudobulk_mode <- params$Pseudobulk_mode
if(is.null(Pseudobulk_mode)){stop('Please set "Pseudobulk_mode" as either TRUE or FALSE')}
## analysis parameters: dimreduction and clustering hyperparameters
# just use defaults in params since its easier, except for pwaycats
pwaycats <- c("HALLMARK", "GO_BP", "GO_MF", "GO_CC", "CP_REACTOME", "CP_KEGG")
#get risc reference
risc_reference <- params$risc_reference
#autofilter parameters
min_num_UMI = params$min_num_UMI
min_num_Feature = params$min_num_Feature
max_perc_mito = params$max_perc_mito
max_perc_hemoglobin = params$max_perc_hemoglobin
autofilter_complexity = params$autofilter_complexity
autofilter_mito = params$autofilter_mito
autofilter_nUMI = params$autofilter_nUMI
autofilter_medianabsolutedev_threshold = params$autofilter_medianabsolutedev_threshold
autofilter_loess_negative_residual_threshold = params$autofilter_loess_negative_residual_threshold
doubletFinder = params$doubletFinder
# #check package versions and add warnings
# if( (packageVersion('DoubletFinder') == "2.0.3") & (packageVersion('Seurat') >= "5.0.0") ){
# warning('DoubletFinder v2.0.3 (latest update as of Nov 14 2023) is not compatible with Seurat > v5.0.0.\nTurning off DoubletFinder.\nTo use DoubletFinder, try downgrading to Seurat v4:',
# '\n',
# "https://satijalab.org/seurat/articles/install_v5#install-previous-versions-of-seurat"
#
# )
#
# doubletFinder <- F
# }
if( (packageVersion('DoubletFinder') == "2.0.3") & (packageVersion('Seurat') >= "5.0.0") ){
warning('Will attempt to coerce Seurat v5 object to work with DoubletFinder v2.0.3; this is unstable and does not always work! If any errors arise, set doubletFinder to FALSE in pipeline runner')
}
pcs_indi <- params$pcs_indi
res_indi <- params$res_indi
pcs_int <- params$pcs_int
res_int <- params$res_int
RISC_louvain_neighbors <- params$RISC_louvain_neighbors
#for these, defaults will be null, then set to lenient if pseudobulk and strict if wilcox
crossconditionDE_padj_thres <- params$crossconditionDE_padj_thres
crossconditionDE_lfc_thres <- params$crossconditionDE_lfc_thres
if(Pseudobulk_mode == T){
if(is.null(crossconditionDE_padj_thres)){
crossconditionDE_padj_thres <- 0.1
}
if(is.null(crossconditionDE_lfc_thres)){
crossconditionDE_lfc_thres <- 0
}
}
if(Pseudobulk_mode == F){
if(is.null(crossconditionDE_padj_thres)){
crossconditionDE_padj_thres <- 0.05
}
if(is.null(crossconditionDE_lfc_thres)){
crossconditionDE_lfc_thres <- 0.25
}
}
pathway_padj_thres <- params$pathway_padj_thres
# pwaycats: NULL
species <- params$species
## parallelization
workernum <- params$workernum
#redo / overwrite
# force_redo <- params$force_redo
force_redo = T
# ## save the parameter choices
# paramsave <- lapply(1:length(params), function(i){
# var <- names(params)[i]
#
# get(var)
# })
#
# names(paramsave) <- names(params)
#
#
# paramsave <- t(data.frame(paramsave))
#
# paramsave <- cbind(rownames(paramsave), paramsave)
# rownames(paramsave) <- NULL
## defining DE comparisons and sample conditions with metadata
#do this below saving params, since we overwrite these names
#metadata
sample_metadata <- params$sample_metadata
if(is.null(sample_metadata)){stop('Please set sample_metadata, path to csv file with Sample column and Condition column')}
#parse pseudobulk
# read.csv...
sample_metadata <- read.csv(sample_metadata)
#parse factor
sample_metadata$Condition <- factor(sample_metadata$Condition,
levels = unique(sample_metadata$Condition))
#add codes, optional
if(!('Code' %in% colnames(sample_metadata))){
sample_metadata$Code <- paste0(sample_metadata$Condition, '_', sample_metadata$Sample)
}
#comparisons
comps <- params$comps
if(!is.null(comps)){ comps <- read.csv(comps) }
if(is.null(comps)){
if( length(levels(sample_metadata$Condition)) > 2 ){
warning('"comps" data.frame not provided, will try to guess from sample_metadata')
comps <- data.frame(c1 = levels(sample_metadata$Condition)[1],
c2 = levels(sample_metadata$Condition)[2])
} else{
stop('Please set comps, path to csv file c1 and c2 column defining comparisons of conditions to use')
}
}
#save params as a data.frame
pl <- list(datadir = datadir,
outdir = outdir,
sample_metadata = params$sample_metadata,
comps = params$comps,
use_labeltransfer = use_labeltransfer,
refdatapath = refdatapath,
m_reference = m_reference,
# SeuratLabelTransfer.normalization.method=SeuratLabelTransfer.normalization.method,
min_num_UMI = min_num_UMI,
min_num_Feature = min_num_Feature,
max_perc_mito = max_perc_mito,
max_perc_hemoglobin = max_perc_hemoglobin,
autofilter_mito = autofilter_mito,
autofilter_nUMI = autofilter_nUMI,
autofilter_complexity = autofilter_complexity,
autofilter_medianabsolutedev_threshold = autofilter_medianabsolutedev_threshold,
autofilter_loess_negative_residual_threshold = autofilter_loess_negative_residual_threshold,
doubletFinder = doubletFinder,
risc_reference = ifelse(is.null(risc_reference), 'auto', risc_reference) ,
pcs_indi = pcs_indi,
res_indi = res_indi,
pcs_int = pcs_int,
res_int = res_int,
RISC_louvain_neighbors = RISC_louvain_neighbors,
Pseudobulk_mode = Pseudobulk_mode,
crossconditionDE_padj_thres = crossconditionDE_padj_thres,
crossconditionDE_lfc_thres = crossconditionDE_lfc_thres,
pathway_padj_thres = pathway_padj_thres,
species = species,
workernum = workernum,
input_seurat_obj = input_seurat_obj
)
pldf <- data.frame(parameter = names(pl),
value = unlist(pl))
#write it out
write.csv(pldf, paste0(outdir, '/pipeline_parameters.csv'), quote = F, row.names = F)
This report summarizes analysis of single-cell RNA-sequencing (scRNA-seq) data including single sample analysis, label transfer from a cell-type annotated reference (optional), integration with batch correction, differential expression with support for pseudobulk and multi-condition comparisons, and pathway analysis. All plots, tables, markers, Seurat objects, RISC object, DE results, and pathway analysis results are stored in the output folder.
The analysis pipelines in this report were developed by the lab of Dr. Deyou Zheng in the Department of Genetics and Department of Neuroscience at Albert Einstein College of Medicine. The pipeline was assembled primarily by PhD candidate Alexander Ferrena with additional input from all lab members.
Below is a short methods-section style description of the pipeline. More detailed information on can be found in each sub-section. If required, software versions of all R packages can be found at the bottom of the document in the SessionInfo section. The version of Cellranger can be found in the web_summary.html files provided with the data release. Please read carefully and adjust accordingly before using for manuscripts or applications.
text1 <-
'
Sequencing data from the 10x Chromium samples were first analyzed with Cellranger to generate cell level gene expression data [(10X Genomics)](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger). Cells with fewer than 500 Unique Molecular Identifiers (UMIs) detected, fewer than 200 unique genes detected, more than 25% mitochondrial content, or more than 25% hemoglobin gene expression were filtered out (unless otherwise specified). An automated filtering approach to detect outliers of these variables based on median absolute deviation above 3 was also applied. Furthermore, complexity analysis based on the expected number of genes given number of UMIs per cell was also used to filter out poor quality cells. Samples were analyzed using the Seurat workflow [(Hao et al 2021)](https://doi.org/10.1016/j.cell.2021.04.048). Each sample was normalized using the SingleCellTransform pipeline [(Hafemeister & Satija 2019)](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1) with method set to "glmGamPoi" [(Ahlmann-Eltze & Huber 2020)](https://academic.oup.com/bioinformatics/article/36/24/5701/6028985). 30 principal components were used for graph construction and resolution was set to 0.5 for Louvain clustering (unless otherwise specified). Marker analysis was performed using the Wilcoxon Rank Sum Test as implemented in the Seurat FindAllMarkers() function with "only.pos" set to True. To identify cell types present in each sample, the Seurat anchor-based Label Transfer method was used, based on a single-cell RNA-seq dataset from similar tissue (unless otherwise specified).
'
if(Pseudobulk_mode == T){
text2 <- 'Samples were integrated using the RISC package [(Liu, Zheng et al Nat Biotech 2021)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8456427/). 30 PCs were used for integration as well as for post-clustering analysis including Louvain clustering, unless otherwise specified. Compositional analysis to compare proprotional abundance of clusters between conditions was performed via the Propeller test as implemented in the speckle package [(Phipson et al 2022)](https://academic.oup.com/bioinformatics/article/38/20/4720/6675456). Differential expression analysis across conditions followed a pseudobulking approach based on the EdgeR-Likelihood Ratio Test (LRT) method for each cluster [(Robinson et al 2010)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2796818/). Pathway analysis on the differentially expressed genes was performed using Gene Set Enrichment Analysis (GSEA) as implemented in the FGSEA package [(Subramanian et al 2005)](https://www.pnas.org/doi/10.1073/pnas.0506580102), [(Korotkevich et al 2021)](https://www.biorxiv.org/content/10.1101/060012v3). Pathways were downloaded from the Molecular Signatures Database using the R package msigdbr [(Liberzon et al 2015)](https://academic.oup.com/bioinformatics/article/27/12/1739/257711), [(Dolgalev 2022)](https://github.com/igordot/msigdbr).
'
}
if(Pseudobulk_mode == F){
text2 <- 'Samples were integrated using the RISC package [(Liu, Zheng et al Nat Biotech 2021)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8456427/). 30 PCs were used for integration as well as for post-clustering analysis including Louvain clustering, unless otherwise specified. Compositional analysis to compare proprotional abundance of clusters between conditions was performed via the 2-sample Z test as implemented in the R prop.test() function. Differential expression analysis across conditions used the Wilcoxon test as implemented in the Seurat FindMarkers() function. Pathway analysis on the differentially expressed genes was performed using Gene Set Enrichment Analysis (GSEA) as implemented in the FGSEA package [(Subramanian et al 2005)](https://www.pnas.org/doi/10.1073/pnas.0506580102), [(Korotkevich et al 2021)](https://www.biorxiv.org/content/10.1101/060012v3). Pathways were downloaded from the Molecular Signatures Database using the R package msigdbr [(Liberzon et al 2015)](https://academic.oup.com/bioinformatics/article/27/12/1739/257711), [(Dolgalev 2022)](https://github.com/igordot/msigdbr).
'
}
# cat(text1); cat(text2)
alltext <- paste0(text1, '\n', text2)
cat(alltext)
Sequencing data from the 10x Chromium samples were first analyzed with Cellranger to generate cell level gene expression data (10X Genomics). Cells with fewer than 500 Unique Molecular Identifiers (UMIs) detected, fewer than 200 unique genes detected, more than 25% mitochondrial content, or more than 25% hemoglobin gene expression were filtered out (unless otherwise specified). An automated filtering approach to detect outliers of these variables based on median absolute deviation above 3 was also applied. Furthermore, complexity analysis based on the expected number of genes given number of UMIs per cell was also used to filter out poor quality cells. Samples were analyzed using the Seurat workflow (Hao et al 2021). Each sample was normalized using the SingleCellTransform pipeline (Hafemeister & Satija 2019) with method set to “glmGamPoi” (Ahlmann-Eltze & Huber 2020). 30 principal components were used for graph construction and resolution was set to 0.5 for Louvain clustering (unless otherwise specified). Marker analysis was performed using the Wilcoxon Rank Sum Test as implemented in the Seurat FindAllMarkers() function with “only.pos” set to True. To identify cell types present in each sample, the Seurat anchor-based Label Transfer method was used, based on a single-cell RNA-seq dataset from similar tissue (unless otherwise specified).
Samples were integrated using the RISC package (Liu, Zheng et al Nat Biotech 2021). 30 PCs were used for integration as well as for post-clustering analysis including Louvain clustering, unless otherwise specified. Compositional analysis to compare proprotional abundance of clusters between conditions was performed via the Propeller test as implemented in the speckle package (Phipson et al 2022). Differential expression analysis across conditions followed a pseudobulking approach based on the EdgeR-Likelihood Ratio Test (LRT) method for each cluster (Robinson et al 2010). Pathway analysis on the differentially expressed genes was performed using Gene Set Enrichment Analysis (GSEA) as implemented in the FGSEA package (Subramanian et al 2005), (Korotkevich et al 2021). Pathways were downloaded from the Molecular Signatures Database using the R package msigdbr (Liberzon et al 2015), (Dolgalev 2022).
plotlab <- '
### Sample names and conditions
Here we list the sample names, conditions, and code names for each sample.
'
cat(plotlab)
Here we list the sample names, conditions, and code names for each sample.
knitr::kable(sample_metadata)
| Sample | Condition | Code |
|---|---|---|
| H00049 | Healthy | Healthy_1 |
| H00053 | Healthy | Healthy_2 |
| S00006 | Covid_Mild | Covid_Mild_1 |
| S00016 | Covid_Mild | Covid_Mild_2 |
| S00040 | Covid_Critical | Covid_Critical_1 |
| S00043 | Covid_Critical | Covid_Critical_2 |
plotlab <- '
### Comparisons to perform
We set up a cross-condition comparison using each row of the table below. For example, for each row, the "c1" column is compared with the "c2" column in differential expression analysis.
'
cat(plotlab)
We set up a cross-condition comparison using each row of the table below. For example, for each row, the “c1” column is compared with the “c2” column in differential expression analysis.
knitr::kable(comps)
| c1 | c2 |
|---|---|
| Covid_Critical | Healthy |
| Covid_Mild | Healthy |
| Covid_Critical | Covid_Mild |
plotlab <- '
### Analysis parameters
Here we list the key analysis parameters used in this analysis.
'
cat(plotlab)
Here we list the key analysis parameters used in this analysis.
rownames(pldf) <- NULL
knitr::kable(pldf)
| parameter | value |
|---|---|
| datadir | /gs/gsfs0/users/aferrena/data/deyou/scDAPP/data//datadir// |
| outdir | /gs/gsfs0/users/aferrena/data/deyou/scDAPP/data//outs/PreRunOut// |
| sample_metadata | /gs/gsfs0/users/aferrena/data/deyou/scDAPP/data//sample_metadata.csv |
| comps | /gs/gsfs0/users/aferrena/data/deyou/scDAPP/data//comps.csv |
| use_labeltransfer | TRUE |
| refdatapath | /gs/gsfs0/users/aferrena/data/deyou/scDAPP/data//labeltransferref/LabelTransferRef_SCTnormalized.rds |
| m_reference | /gs/gsfs0/users/aferrena/data/deyou/scDAPP/data//labeltransferref/LabelTransferRefMarkers.rds |
| min_num_UMI | 500 |
| min_num_Feature | 200 |
| max_perc_mito | 25 |
| max_perc_hemoglobin | 25 |
| autofilter_mito | TRUE |
| autofilter_nUMI | TRUE |
| autofilter_complexity | TRUE |
| autofilter_medianabsolutedev_threshold | 3 |
| autofilter_loess_negative_residual_threshold | -5 |
| doubletFinder | TRUE |
| risc_reference | auto |
| pcs_indi | 30 |
| res_indi | 0.5 |
| pcs_int | 30 |
| res_int | 0.5 |
| RISC_louvain_neighbors | 10 |
| Pseudobulk_mode | TRUE |
| crossconditionDE_padj_thres | 0.1 |
| crossconditionDE_lfc_thres | 0 |
| pathway_padj_thres | 0.1 |
| species | Homo sapiens |
| workernum | 6 |
| input_seurat_obj | TRUE |
First, we preprocess and analyze each individual sample. This will go through the following steps for each sample:
After the analysis of individual samples, we will do an integrated analysis.
### load packages, set seed
library(tidyverse)
library(patchwork) # combine plots
library(RISC)
library(Seurat)
library(scDAPP)
library(DoubletFinder)
library(future)
library(parallel)
library(foreach)
library(glmGamPoi) # for faster SCT
library(ComplexHeatmap) # for heatmaps
library(ggdendro) #for clustering dendrograms
library(ggridges) # qc ridgeplots
library(edgeR)
library(msigdbr) #get pathways (cross species) from msigdb
library(hdf5r) # HARD TO INSTALL: installed thru mamba
library(ggalluvial) # part of alluvial plot
library(ggfittext) # part of alluvial plot
library(ggrepel) # part of alluvial plot
set.seed(2022)
Once sequencing is completed, we get a file that has the barcode and cDNA sequences as reads (a bunch of ATGCs), along with some sequencing quality information. This file is called a “FASTQ” and has the file extension “.fastq”. The downstream analysis relies on analyzing counts of transcripts for each cell. This means we have to do a few things:
Alignment: assign each sequence read to the gene it came from.
Gene demultiplexing: Once the read-pair is aligned to a gene, we count the UMI barcode as a count for that gene. Two reads may come from the same gene, but if they have the same UMI, it means they came from the same transcript, so it only counts at one UMI. This allows us to get around the problem of short-read sequencing inherent to Illumina technology, to count actual transcripts rather than counting reads.
cell demultiplexing: During 10X library prep, each cell is isolated and given a barcode. Every read from that cell is labelled with the cell barcode. Using the barcode, we assign the read to a specific cell.
These preprocessing steps are performed by the 10x Genomics software called Cellranger. Cellranger produces some outputs including web_summary.html files that allow basic exploration of the sample quality. It is good practice to review these files for each sample.
Downstream analysis, includng clustering and label transfer, works with the output of Cellranger Count. This includes a gene by cell matrix. Each row is a gene, each column is a cell and each value is the number of UMIs for each gene from each cell; typically, this is ~20K genes (rows) x ~8K cells (columns). Once that is all complete, we are ready for the analysis performed in this document. We use the Seurat pipeline in R to analyze this data, which involves the steps detailed in section 1.
Flawed datapoints can include poor quality cells and multiplets. Poor quality cells can include cells which have sustained membrane damage and leakage of their RNA, a process which typically results the observed transcriptomic profile of that cell displaying a high proportion of mitochondrial RNA, or low numbers of UMIs or unique genes. Multiplets can be caused by random error of poisson loading of single-cells in the 10X microfluidics chamber. These can influence and bias downstream analysis. Occasionally, we also sometimes observe red blood cells despite usage of RBC lysis buffer. These should not be in the data and so we remove them as well.
#set up output dir for individual samples
outdir_indi <- paste0(outdir, '/individualsample_analysis')
dir.create(outdir_indi)
### read in data ###
### use pseudobulk_md for good sample order
samples <- sample_metadata$Sample
names(samples) <- sample_metadata$Code
#read in data from filtered_feature_bc_matrix.h5 files or from Seurat object (ie for hashed)
if(input_seurat_obj == T){## read in
sobjlist <- lapply(samples, function(samp){
message('\nReading in ', samp)
#for HTO, we will assume saved objects
sobjfile <- paste0(datadir, '/', samp, '.rds')
sobj <- readRDS(sobjfile)
#make project name the Conditon_Sample
md_samp <- sample_metadata[sample_metadata$Sample==samp,,drop=F]
#make project name the sample
sobj@project.name <- md_samp$Code
#make orig.ident the code
sobj$orig.ident <- md_samp$Code
#return seurat obj
sobj
})
} else{
## read in
sobjlist <- lapply(samples, function(samp){
message('\nReading in ', samp)
datafp <- paste0(datadir, '/', samp)
# if on hpc, use below
# datafp <- paste0(datadir, '/', samp, '/outs/')
# for the dl data, we need to find the filepath
h5_filename <- grep(pattern = 'filtered_feature_bc_matrix.h5',
list.files(datafp, recursive = T, full.names = T),
value = T)
#read in
sobj <- CreateSeuratObject( Read10X_h5(h5_filename),
min.cells= 3)
#make project name the Conditon_Sample
md_samp <- sample_metadata[sample_metadata$Sample==samp,,drop=F]
#make project name the sample
sobj@project.name <- md_samp$Code
#make orig.ident the code
sobj$orig.ident <- md_samp$Code
#return seurat obj
sobj
})
}
#add in qc values for mito and hemoglobin
sobjlist <- lapply(sobjlist, function(sobj){
#mito content, add to metadata
mito.features <- grep(pattern = "^mt-", x = rownames(x = sobj), value = TRUE, ignore.case = T)
sobj[["percent.mito"]] <- Seurat::PercentageFeatureSet(sobj, features = mito.features)
#hemoglobin content, add to metadata
sobj$percent.hemoglobin <- scDAPP::calculate_percent.hemoglobin(sobj)
#calculate phase (if possible, sometimes it fails)
try(
sobj <- CellCycleScoring(sobj,
s.features = Seurat::cc.genes.updated.2019$s.genes,
g2m.features = Seurat::cc.genes.updated.2019$g2m.genes)
)
sobj
})
#here we do most of the filtering
rawobjsdir <- paste0(outdir_indi, '/unfiltered_Seurat_objects')
dir.create(rawobjsdir, recursive = T)
outdir_indi_seuratobjs <- paste0(outdir_indi, '/processed_Seurat_objects')
dir.create(outdir_indi_seuratobjs, recursive = T)
qcdir <- paste0(outdir_indi, '/qualitycontrol_filtering')
dir.create(qcdir, recursive = T)
#make a temp dir and run one at a time or parallel with foreach...
## remove seurat objects; save to tmp files instead...
qctmpdir <- paste0(qcdir, '/qctmpdir/')
dir.create(qctmpdir, recursive = T)
sobjlist <- lapply(sobjlist, function(sobj){
code <- sobj@project.name
tmpsobjfp <- paste0(qctmpdir, '/', code, '.rds')
saveRDS(sobj, tmpsobjfp)
return(sobj)
})
#clean up env
rm(sobjlist)
invisible(gc(full = T, reset = F, verbose = F))
#actual processing steps
#0. read in each sample from temp
#1. normalize and cluster raw data w/o filter
#2. apply autofilter
#3. filter out initial auto filter
#4. IF DF == T: renormalize, recluster, apply doubletfinder, refilter
#5. renormalize, recluster with real clustering parameters
#6. add all filter out annotation to raw sobj
#7. do some analysis on raw and save it
#8. save files (raw and filtered/processed)
#9. return autofilter results
cl <- parallel::makeCluster(workernum)
doParallel::registerDoParallel(cl)
codes <- sample_metadata$Code
# codes <- codes[11:17] troubleshoot particular samples
# af_md_list <- lapply(codes, function(code){
af_md_list <- foreach(code = sample_metadata$Code,
.packages = c('Seurat', 'ggplot2', 'dplyr',
'scDAPP','grid', 'irlba', 'Matrix'), .verbose = T) %dopar%
{
message(code)
#read in sobj
sobj <- readRDS(paste0(qctmpdir, '/', code, '.rds'))
#1. normalize and cluster raw data w/o filter
#normalize and cluster
suppressWarnings(sobj <- Seurat::SCTransform(sobj, verbose = T, method="glmGamPoi"))
sobj <- Seurat::RunPCA(object = sobj, verbose = F)
sobj <- Seurat::FindNeighbors(object = sobj, dims = 1:30, verbose = F)
sobj <- Seurat::FindClusters(object = sobj, resolution = 0.1, verbose = F, algorithm = 1)
sobj <- Seurat::RunUMAP(sobj, dims = 1:30)
#2. apply autofilter
af <- scDAPP::autofilter(sobj,
min_num_UMI = min_num_UMI,
min_num_Feature = min_num_Feature,
max_perc_mito = max_perc_mito,
max_perc_hemoglobin = max_perc_hemoglobin,
globalfilter.complexity = autofilter_complexity,
globalfilter.mito = autofilter_mito,
globalfilter.libsize = autofilter_nUMI,
mad.score.threshold = autofilter_medianabsolutedev_threshold,
loess_negative_residual_threshold = autofilter_loess_negative_residual_threshold
)
#3. filter out initial auto filter
#name unfiltered object as sobjraw, and filtered as sobj for now and sobjsave after
sobjraw <- sobj
cellstatus <- af$cellstatus
goodcells <- cellstatus[cellstatus$filteredout==F,"barcodes"]
sobj <- sobj[,goodcells]
#4. IF DF == T: renormalize, recluster, apply doubletfinder, refilter
if(doubletFinder == T){
#filter, re-proc
### temporary fix for doubletfinder v2.0.3 and Seurat v5: coerce seurat to "v3" instead of v5 object
if( (packageVersion('DoubletFinder') == "2.0.3") & (packageVersion('Seurat') >= "5.0.0") ){
## try to make it a old seurat object...
sobj_df <- GetAssayData(sobj, assay = 'SCT', layer = 'data')
sobj_df <- CreateAssayObject(sobj_df)
sobj_df <- CreateSeuratObject(sobj_df)
warning('Will attempt to coerce v5 Seurat object to work with DoubletFinder v2.0.3; this is unstable and does not always work! If any errors arise, set doubletFinder to FALSE in pipeline runner')
} else{sobj_df <- sobj}
#normalize and cluster
suppressWarnings(sobj_df <- Seurat::SCTransform(sobj_df, verbose = T, method="glmGamPoi"))
sobj_df <- Seurat::RunPCA(object = sobj_df, verbose = F)
sobj_df <- Seurat::FindNeighbors(object = sobj_df, dims = 1:30, verbose = F)
sobj_df <- Seurat::FindClusters(object = sobj_df, resolution = 0.1, verbose = F, algorithm = 1)
sobj_df <- RunUMAP(sobj_df, dims = 1:30)
try(expr = {
### run DF
af <- scDAPP::doubletfinderwrapper(sobj_df,
autofilterres = af,
num.cores = 1)
#filter, re-proc
cellstatus <- af$cellstatus
goodcells <- cellstatus[cellstatus$filteredout==F,"barcodes"]
sobj <- sobj[,goodcells]
})
rm(sobj_df); gc(full = T)
}
#previously we did main analysis here, for ease of code reading
# we'll do it later in the clustering section
# MAKE SURE RAW PREFILT CLUSTERS ARE LABELLED APPROPRIATELY IN SOBJSAVE
colnames(sobj@meta.data)[grepl('SCT_snn_res.0.1', colnames(sobj@meta.data))] <- 'PREFILTER_SCT_snn_res.0.1'
#name filtered object as sobjsave
sobjsave <- sobj ; rm(sobj)
#7. do some analysis on raw and save it
#do a bit of analysis
#add af cell status to sobjraw md
sobjraw@meta.data <- cbind(sobjraw@meta.data, af$cellstatus[,-1])
#find markers; do not futurize, it breaks everything
m <- FindAllMarkers(sobjraw, only.pos = T)
# as of Nov 9 2023 (Seurat v5): add score to markers
m$score <- (m$pct.1 - m$pct.2) * m$avg_log2FC
#prep genes
n <- 5
top <- m %>% group_by(cluster) %>% top_n(n = n, wt = score)
#make some plots
d_rawclust <- DimPlot(sobjraw, group.by = 'seurat_clusters', label = T, repel = T)+ggtitle('Unfiltered data clusters', subtitle = 'Louvain res = 0.1')
d_raw_filt <- DimPlot(sobjraw, group.by = 'filteredout', label = F, repel = T)
sobjraw$filterreason <- factor(sobjraw$filterreason, levels = names(sort(table(sobjraw$filterreason), decreasing = T)))
d_raw_filt_reason <- DimPlot(sobjraw, group.by = 'filterreason', label = F, repel = T)
fp_raw_qc <- FeaturePlot(sobjraw, c('nCount_RNA', 'nFeature_RNA',
'percent.mito', 'percent.hemoglobin'),
order = T)
#prep per-cluster filter numbers
tab_filt_by_clust <- table(sobjraw$filterreason, sobjraw$seurat_clusters)
tab_filt_by_clust <- t(tab_filt_by_clust)
rownames(tab_filt_by_clust) <- paste0('cluster_', rownames(tab_filt_by_clust))
colnames(tab_filt_by_clust) <- gsub(x = colnames(tab_filt_by_clust),
pattern = '\\.', '\n')
hm_raw <- DoHeatmap(sobjraw, top$gene, raster = F)+NoLegend() + labs(title = "Pre-filter cluster markers")
# d_filt_clust <- DimPlot(sobjraw, group.by = newclustname, label = T, repel = T)+ggtitle('Filtered data clusters')
#alluvial plot: prep colors and make sure order is hi to lo
sobjraw$filterreason <- factor(sobjraw$filterreason, levels = names(sort(table(sobjraw$filterreason), decreasing = T)))
pal <- grDevices::colorRampPalette(RColorBrewer::brewer.pal('Dark2', n = 8))(length(levels(sobjraw$seurat_clusters)))
ap_filt <- alluvialplot(sobjraw@meta.data[,c('seurat_clusters', 'filteredout')])+
scale_fill_manual(values = pal)+
labs(title = 'Cluster filtering')
ap_filt_reason <- alluvialplot(sobjraw@meta.data[,c('seurat_clusters', 'filterreason')])+
scale_fill_manual(values = pal)+
labs(title = 'Cluster filtering reason')
#add some basic filter vln plots
comm <- af$allcommands
rownames(comm) <- comm$Command
af$vln_umi <- VlnPlot(sobjraw, 'nCount_RNA', group.by = 'orig.ident')+
scale_y_log10(labels = scales::label_comma())+
geom_hline(yintercept = comm['min_num_UMI', 2],
linetype = 'dotted')+
labs(caption = paste0("cutoff = ", comm['min_num_UMI', 2]))
af$vln_feature <- VlnPlot(sobjraw, 'nFeature_RNA', group.by = 'orig.ident')+
scale_y_log10(labels = scales::label_comma())+
geom_hline(yintercept = comm['min_num_Feature', 2],
linetype = 'dotted')+
labs(caption = paste0("cutoff = ", comm['min_num_Feature', 2]))
af$vln_mito <- VlnPlot(sobjraw, 'percent.mito', group.by = 'orig.ident')+
geom_hline(yintercept = comm['max_perc_mito', 2],
linetype = 'dotted')+
labs(caption = paste0("cutoff = ", comm['max_perc_mito', 2]))
af$vln_hemo <- VlnPlot(sobjraw, 'percent.hemoglobin', group.by = 'orig.ident')+
geom_hline(yintercept = comm['max_perc_hemoglobin', 2],
linetype = 'dotted')+
labs(caption = paste0("cutoff = ", comm['max_perc_hemoglobin', 2]))
#add to autofilter
af$d_rawclust <- d_rawclust
af$d_raw_filt <- d_raw_filt
af$d_raw_filt_reason <- d_raw_filt_reason
af$fp_raw_qc <- fp_raw_qc
af$tab_filt_by_clust <- tab_filt_by_clust
af$hm_raw <- hm_raw
#af$d_filt_clust <- d_filt_clust
af$ap_filt <- ap_filt
af$ap_filt_reason <- ap_filt_reason
#change colnames for baseline sumary
colnames(af$baseline_qc_summary) <- gsub("summary_", "summary\n", colnames(af$baseline_qc_summary))
#save it all
# save the raw objects
saveRDS(sobjraw, paste0(rawobjsdir, '/Unfiltered-SeuratObject-', code, '.rds'))
#save the autofilter as a nice pdf
afpdf <- paste0(qcdir, '/QC_autofilter_summary-', code,'.pdf')
pdf(afpdf, height = 10, width = 10)
pdftable(af$filtersummary, title = 'Cell Filtering Summary')
pdftable(af$allcommands, title = 'Filter parameters')
pdftable(round(af$baseline_qc_summary, 2), title = 'QC summary stats')
print(af$vln_umi)
print(af$vln_feature)
print(af$vln_mito)
print(af$vln_hemo)
print(af$globalfilter.complexity)
print(af$globalfilter.libsize)
print(af$globalfilter.mito)
print(af$d_rawclust)
print(af$d_raw_filt)
print(af$d_raw_filt_reason)
print(af$fp_raw_qc)
pdftable(af$tab_filt_by_clust, title = 'Cell filtering per cluster')
print(af$hm_raw)
# print(af$d_filt_clust)
print(af$ap_filt)
print(af$ap_filt_reason)
dev.off()
#8. save procesed object
#code <- sobj@project.name
sobjfile <- paste0(qctmpdir, '/', code, '.rds')
saveRDS(sobjsave, sobjfile)
#9. return autofilter and raw md
rawmd <- sobjraw@meta.data
rm(sobjsave, sobjraw)
invisible(gc(full = T, reset = F, verbose = F))
list(af, rawmd)
} # for foreach
# }) # for lapply
parallel::stopCluster(cl)
#recover the autofilter and metadata lists
aflist <- lapply(af_md_list, function(subl){
subl[[1]]
})
mdlist <- lapply(af_md_list, function(subl){
subl[[2]]
})
names(aflist) <- sample_metadata$Code
names(mdlist) <- sample_metadata$Code
#clean mem
rm(af_md_list)
invisible(gc(full = T, reset = F, verbose = F))
### prepare some summary plots for filtering
# #from autofilter list, get baseline summaries and cell filter stats
cn <- colnames(aflist[[1]]$baseline_qc_summary)
bsl <- lapply(cn, function(var){
sampsum <- lapply(1:length(aflist), function(i){
af <- aflist[[i]]
samp <- names(aflist)[i]
sampsum <- af$baseline_qc_summary[,var,drop = F]
colnames(sampsum) <- samp
sampsum
})
sampsum <- dplyr::bind_cols(sampsum)
sampsum <- t(sampsum)
})
cn <- gsub(x=cn, 'summary\n', '')
names(bsl) <- cn
#from outlier list, filter summary
fs <- lapply(1:length(aflist), function(i){
af <- aflist[[i]]
samp = names(aflist)[i]
sum <- af$filtersummary
rownames(sum) <- sum[,1]
sum <- sum[,2, drop = F]
colnames(sum) <- samp
sum
})
#sometimes, some samples don't have any removed for some category ,so need to fix
remnames <- unique(unlist(lapply(fs, rownames)))
fs <- lapply(fs, function(sum){
#identify missing filters and set to 0
if( any(!(remnames %in% rownames(sum))) ){
missingfilts <- remnames[!(remnames %in% rownames(sum))]
adddf <- data.frame(samp = rep(0, length(missingfilts)))
colnames(adddf) <- colnames(sum)
rownames(adddf) <- missingfilts
sum <- rbind(sum, adddf)
}
#make sure all have same order
sum <- sum[match(remnames, rownames(sum)),,drop=F]
})
#bind table
fs <- dplyr::bind_cols(fs)
fs <- t(fs)
#shorten names
colnames(fs) <- gsub('globalfilter', 'auto', colnames(fs))
colnames(fs) <- gsub('DoubletFinder_doublet', 'Doublet', colnames(fs))
#rearragne, total cells, unfilt, then filtered cells
fs <- cbind( fs[,c(ncol(fs), ncol(fs)-1)], fs[,1:(ncol(fs)-2)] )
## also prep some summary plots for each sample ##
### pre-filt plots
combmd <- dplyr::bind_rows(mdlist)
umilims <- c(min(combmd$nCount_RNA), max(combmd$nCount_RNA))
featlims <- c(min(combmd$nFeature_RNA), max(combmd$nFeature_RNA))
### ridgeplots / ridgeline density plots
# #density plot for nCount_RNA
var <- "nCount_RNA"
submd <- combmd[,c(var,'orig.ident')]
colnames(submd)[1] <- 'var'
submd$var <- log10(submd$var)
maxdens <- aggregate(var ~ orig.ident, submd, function(x){max(density(x)$y)})
# submd <- combmd[,c(var,'orig.ident')]
# colnames(submd)[1] <- 'var'
# repelcoords <- aggregate(var ~ orig.ident, submd, median)
# repelcoords$maxdens <- maxdens$var
#
# dens_UMI <- ggplot(combmd, aes(x = .data[[var]], col = orig.ident))+
# geom_density()+
# ggrepel::geom_text_repel(inherit.aes = F,
# data = repelcoords,
# aes(x = var, y = maxdens, label = orig.ident, color = orig.ident))+
# scale_x_log10(labels = scales::label_comma(), name = var)
dens_UMI <- ggplot(combmd, aes(x = .data[[var]], y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var, limits = umilims)+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
#
#
# #density plot for nFeature_RNA
var <- "nFeature_RNA"
submd <- combmd[,c(var,'orig.ident')]
colnames(submd)[1] <- 'var'
submd$var <- log10(submd$var)
maxdens <- aggregate(var ~ orig.ident, submd, function(x){max(density(x)$y)})
# submd <- combmd[,c(var,'orig.ident')]
# colnames(submd)[1] <- 'var'
# repelcoords <- aggregate(var ~ orig.ident, submd, median)
# repelcoords$maxdens <- maxdens$var
#
# dens_feature <- ggplot(combmd, aes(x = .data[[var]], col = orig.ident))+
# geom_density()+
# ggrepel::geom_text_repel(inherit.aes = F,
# data = repelcoords,
# aes(x = var, y = maxdens, label = orig.ident, color = orig.ident))+
# scale_x_log10(labels = scales::label_comma(), name = var)
dens_feature <- ggplot(combmd, aes(x = .data[[var]], y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var, limits = featlims)+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
#
#
# #density plot for mito
var <- "percent.mito"
submd <- combmd[,c(var,'orig.ident')]
colnames(submd)[1] <- 'var'
maxdens <- aggregate(var ~ orig.ident, submd, function(x){max(density(x)$y)})
# submd <- combmd[,c(var,'orig.ident')]
# colnames(submd)[1] <- 'var'
# repelcoords <- aggregate(var ~ orig.ident, submd, median)
# repelcoords$maxdens <- maxdens$var
#
# dens_mito <- ggplot(combmd, aes(x = .data[[var]]+0.1, col = orig.ident))+
# geom_density()+
# ggrepel::geom_text_repel(inherit.aes = F,
# data = repelcoords,
# aes(x = var, y = maxdens, label = orig.ident, color = orig.ident))+
# scale_y_continuous(name = 'density')
dens_mito <- ggplot(combmd, aes(x = .data[[var]]+1, y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var, limits = c(1,100))+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
#
#
# #density plot for hemoglobin
var <- "percent.hemoglobin"
submd <- combmd[,c(var,'orig.ident')]
colnames(submd)[1] <- 'var'
maxdens <- aggregate(var ~ orig.ident, submd, function(x){max(density(x)$y)})
# submd <- combmd[,c(var,'orig.ident')]
# colnames(submd)[1] <- 'var'
# repelcoords <- aggregate(var ~ orig.ident, submd, median)
# repelcoords$maxdens <- maxdens$var
#
# dens_hemo <- ggplot(combmd, aes(x = .data[[var]]+0.01, col = orig.ident))+
# geom_density()+
# ggrepel::geom_text_repel(inherit.aes = F,
# data = repelcoords,
# aes(x = var, y = maxdens, label = orig.ident, color = orig.ident))+
# scale_y_continuous(name = 'density')
dens_hemo <- ggplot(combmd, aes(x = .data[[var]]+1, y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var,limits = c(1,100))+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
## also prep some summary plots for each sample ##
### post-fld plots
combmd <- dplyr::bind_rows(mdlist)
combmd <- combmd[combmd$filteredout == F,]
### ridgeplots / ridgeline density plots
# #density plot for nCount_RNA
var <- "nCount_RNA"
post_dens_UMI <- ggplot(combmd, aes(x = .data[[var]], y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var,
limits = umilims )+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
# #density plot for nFeature_RNA
var <- "nFeature_RNA"
post_dens_feature <- ggplot(combmd, aes(x = .data[[var]], y=orig.ident, fill=after_stat(log10(x))) )+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var,
limits = featlims )+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
# #density plot for mito
var <- "percent.mito"
post_dens_mito <- ggplot(combmd, aes(x = .data[[var]]+1, y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var,
limits = c(1,100) )+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
# #density plot for hemoglobin
var <- "percent.hemoglobin"
post_dens_hemo <- ggplot(combmd, aes(x = .data[[var]]+1, y=orig.ident, fill=after_stat(log10(x))))+
geom_density_ridges_gradient(scale=0.9, stat = "binline", bins = 50) +
scale_x_log10(labels = scales::label_comma(), name = var,
limits = c(1,100) )+
scale_y_discrete(limits = rev(sample_metadata$Code))+
viridis::scale_fill_viridis( option = "C", alpha = 0.7) +
theme_ridges()+theme(legend.position = 'none')
### prepare a whole dataset qc summary pdf
# adjust filter summary table colnames, too long
colnames(fs) <- gsub('auto.', '', colnames(fs))
allsampfiltsumm <- paste0(qcdir, '/AllSamples_QC_Summary.pdf')
#add plot titles
dens_UMI <- dens_UMI + ggtitle('Pre-Filter UMI distribution plot')
post_dens_UMI <- post_dens_UMI + ggtitle('Post-Filter UMI distribution plot')
dens_feature <- dens_feature + ggtitle('Pre-Filter Unique Gene distribution plot')
post_dens_feature <- post_dens_feature + ggtitle('Post-Filter Unique Gene distribution plot')
dens_mito <- dens_mito + ggtitle('Pre-Filter Percent Mito distribution plot')
post_dens_mito <- post_dens_mito + ggtitle('Post-Filter Percent Mito distribution plot')
dens_hemo <- dens_hemo + ggtitle('Pre-Filter Percent Hemoglobin distribution plot')
post_dens_hemo <- post_dens_hemo + ggtitle('Post-Filter Percent Hemoglobin distribution plot')
pdf(allsampfiltsumm, height = 7, width = 7)
print(pdftable(fs, title = "Cell Filter Summary (cell numbers)"))
print( pdftable( round(bsl$nCount_RNA,1) , title = 'Pre-Filter UMI distribution') )
print( dens_UMI )
print( post_dens_UMI )
print( pdftable( round(bsl$nFeature_RNA,1) , title = 'Pre-Filter Unique Gene distribution') )
print( dens_feature )
print( post_dens_feature )
print( pdftable( round(bsl$perc.mito,1) , title = 'Pre-Filter Percent Mito distribution') )
print( dens_mito )
print( post_dens_mito )
print( pdftable( round(bsl$perc.hemoglobin,1) , title = 'Pre-Filter Percent Hemoglobin distribution') )
print( dens_hemo )
print( post_dens_hemo )
dev.off()
Sometimes poor quality cells can occur in the data due to cell membrane damage during sample preparation, etc. This can bias the downstream analysis.
cat("\n\n
Below we show the number of cells removed with filtering. BasicFilter refers to cells failing a minimum threshold: by default, cells must have >= 500 UMIs, 200 unique genes, and <= 25% mitochondria and hemoglobin content.
Auto.complexity refers to outliers from a regression analysis modelling number of genes by number of UMIs, or the 'complexity' of the cell. Specifically we model the log of each of these, where the relationship is very close to linear. We use a double-regression strategy of both linear and loess regression, and outlier cells must have both a high linear regression Cook's distance and very low Loess negative residuals. This means that cells with a lower than expected number of genes given the number of UMIs are filtered out. Typically, this captures poor quaity cells of extreme low-complexity realy cells such as RBCs.
Auto.libsize refers to cells identified as very low outliers based on median absolute deviation from the general distribtuion of UMIs. Auto.mito refers to cells identified as high outliers based on median absolute deviation from the general distribtuion of mitochondrial content.")
Below we show the number of cells removed with filtering. BasicFilter refers to cells failing a minimum threshold: by default, cells must have >= 500 UMIs, 200 unique genes, and <= 25% mitochondria and hemoglobin content.
Auto.complexity refers to outliers from a regression analysis modelling number of genes by number of UMIs, or the ‘complexity’ of the cell. Specifically we model the log of each of these, where the relationship is very close to linear. We use a double-regression strategy of both linear and loess regression, and outlier cells must have both a high linear regression Cook’s distance and very low Loess negative residuals. This means that cells with a lower than expected number of genes given the number of UMIs are filtered out. Typically, this captures poor quaity cells of extreme low-complexity realy cells such as RBCs.
Auto.libsize refers to cells identified as very low outliers based on median absolute deviation from the general distribtuion of UMIs. Auto.mito refers to cells identified as high outliers based on median absolute deviation from the general distribtuion of mitochondrial content.
knitr::kable(fs)
| BasicFilter | Total | libsize | mito | Unfiltered | |
|---|---|---|---|---|---|
| Healthy_1 | 0 | 800 | 24 | 26 | 750 |
| Healthy_2 | 1 | 800 | 37 | 20 | 742 |
| Covid_Mild_1 | 2 | 800 | 25 | 17 | 756 |
| Covid_Mild_2 | 2 | 707 | 16 | 22 | 667 |
| Covid_Critical_1 | 1 | 684 | 3 | 31 | 649 |
| Covid_Critical_2 | 1 | 800 | 7 | 48 | 744 |
cat('\n\n
Here we show details of the filtering approach, including the minimum UMI and unique gene cutoffs, maximum percent mito and percent hemoglobin cutoffs. Additionally, we show paramters for the sample-wise cutoffs including median absolute deiation (mad) score threshold, and loess residual threshold. These are used to make tighter cutoffs in an automated, sample-by-sample basis. The actual sample wise cutoffs are provided for each sample in a report in the QC folder called "qualitycontrol_filtering".')
Here we show details of the filtering approach, including the minimum UMI and unique gene cutoffs, maximum percent mito and percent hemoglobin cutoffs. Additionally, we show paramters for the sample-wise cutoffs including median absolute deiation (mad) score threshold, and loess residual threshold. These are used to make tighter cutoffs in an automated, sample-by-sample basis. The actual sample wise cutoffs are provided for each sample in a report in the QC folder called “qualitycontrol_filtering”.
knitr::kable( aflist[[1]]$allcommands )
| Command | Option |
|---|---|
| mad.score.threshold | 3 |
| loess_negative_residual_threshold | -5 |
| min_num_UMI | 500 |
| min_num_Feature | 200 |
| max_perc_mito | 25 |
| max_perc_hemoglobin | 25 |
| globalfilter.complexity | 1 |
| globalfilter.libsize | 1 |
| globalfilter.mito | 1 |
Unique Molecular Identifiers (UMIs) are sequence labels that are attached to each transcript during library prep, which allow for counting unique mRNA transcripts in 10X data.
cat('Here we show the summary statistics for the distributions of UMIs in each sample before filtering.')
Here we show the summary statistics for the distributions of UMIs in each sample before filtering.
knitr::kable(bsl$nCount_RNA)
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Healthy_1 | 512 | 2315.00 | 3052.5 | 3359.045 | 4054.00 | 19994 |
| Healthy_2 | 497 | 2920.25 | 3890.5 | 4200.469 | 4972.50 | 17778 |
| Covid_Mild_1 | 451 | 2217.25 | 2874.5 | 3196.146 | 3814.75 | 41783 |
| Covid_Mild_2 | 486 | 2156.00 | 2932.0 | 3312.269 | 3847.00 | 44034 |
| Covid_Critical_1 | 487 | 2631.75 | 3947.5 | 4475.493 | 5612.75 | 39160 |
| Covid_Critical_2 | 461 | 2262.50 | 3287.5 | 4342.238 | 4559.50 | 54956 |
cat('#### UMI pre-filter distribution
Here we plot the distribution of UMIs per cell for all samples before filtering.')
Here we plot the distribution of UMIs per cell for all samples before filtering.
print(dens_UMI)
cat('#### UMI post-filter distribution
Here we plot the distribution of UMIs per cell for all samples after filtering.')
Here we plot the distribution of UMIs per cell for all samples after filtering.
print(post_dens_UMI)
cat('Here we show the summary statistics for the distributions of genes (or "features", as they are reffered to in Seurat and machine learning jargon) in each sample before filtering.')
Here we show the summary statistics for the distributions of genes (or “features”, as they are reffered to in Seurat and machine learning jargon) in each sample before filtering.
knitr::kable(bsl$nFeature_RNA)
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Healthy_1 | 331 | 915.75 | 1103.5 | 1150.872 | 1319.25 | 2994 |
| Healthy_2 | 346 | 1150.75 | 1371.5 | 1407.696 | 1618.00 | 3550 |
| Covid_Mild_1 | 303 | 826.50 | 1046.5 | 1081.787 | 1259.25 | 5834 |
| Covid_Mild_2 | 303 | 950.00 | 1202.0 | 1241.434 | 1457.00 | 5096 |
| Covid_Critical_1 | 302 | 919.00 | 1204.5 | 1288.404 | 1553.25 | 4571 |
| Covid_Critical_2 | 339 | 882.00 | 1165.5 | 1338.001 | 1549.75 | 6095 |
cat('#### Unique Feature pre-filter distribution
Here we plot the distribution of features per cell for all samples before filtering.')
Here we plot the distribution of features per cell for all samples before filtering.
print(dens_feature)
cat('#### Unique Feature post-filter distribution
Here we plot the distribution of features per cell for all samples after filtering.')
Here we plot the distribution of features per cell for all samples after filtering.
print(post_dens_feature)
The percent of mitochondrial content is used as a metric of cell quality in single-cell data. If cells are damaged during handling, they often sustain membrane tearing, which causes cytoplasmic RNA to leak, while mitochondria and mitochondrial RNA is retained. Thus, poor quality, damaged cells often have enriched mt-RNA.
cat('Here we show the summary statistics for the distributions of mitochondrial content in each sample before filtering.')
Here we show the summary statistics for the distributions of mitochondrial content in each sample before filtering.
knitr::kable(bsl$perc.mito)
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Healthy_1 | 0.0787608 | 2.7055380 | 3.576703 | 3.754190 | 4.527047 | 9.525773 |
| Healthy_2 | 0.0572082 | 2.3795023 | 3.099182 | 3.325760 | 3.961888 | 9.483248 |
| Covid_Mild_1 | 0.1339457 | 2.4872762 | 3.381768 | 3.670444 | 4.579035 | 9.745982 |
| Covid_Mild_2 | 0.3158310 | 1.4212632 | 2.255410 | 2.610707 | 3.327504 | 9.384615 |
| Covid_Critical_1 | 0.0000000 | 1.5955738 | 2.278323 | 2.567974 | 3.159081 | 8.747220 |
| Covid_Critical_2 | 0.0000000 | 0.9512908 | 1.452882 | 1.883709 | 2.310267 | 9.349956 |
cat('#### Mito pre-filter distribution
Here we plot the distribution of mitochondrial content per cell for all samples before filtering.')
Here we plot the distribution of mitochondrial content per cell for all samples before filtering.
print(dens_mito)
cat('#### Mito post-filter distribution
Here we plot the distribution of mitochondrial content per cell for all samples after filtering.')
Here we plot the distribution of mitochondrial content per cell for all samples after filtering.
print(post_dens_mito)
Hemoglobin is expressed strongly but not exclusively by red blood cells (RBCs), which are typically removed during data generation by RBC lysis buffer, because they are extremely numerous and can overwhelm the tissue cells of interest. However, many samples will often still contain some RBCs (or many, if sample quality is poor). Removing them allows focusing in on the cells of interest.
cat('Here we show the summary statistics for the distributions of hemoglobin content in each sample before filtering.')
Here we show the summary statistics for the distributions of hemoglobin content in each sample before filtering.
knitr::kable(bsl$perc.hemoglobin)
| Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. | |
|---|---|---|---|---|---|---|
| Healthy_1 | 0 | 0 | 0 | 0.0015999 | 0 | 0.2697842 |
| Healthy_2 | 0 | 0 | 0 | 0.0013343 | 0 | 0.0652316 |
| Covid_Mild_1 | 0 | 0 | 0 | 0.0013297 | 0 | 0.1336898 |
| Covid_Mild_2 | 0 | 0 | 0 | 0.0053219 | 0 | 0.3181336 |
| Covid_Critical_1 | 0 | 0 | 0 | 0.0013056 | 0 | 0.0634921 |
| Covid_Critical_2 | 0 | 0 | 0 | 0.0007167 | 0 | 0.0833333 |
cat('#### Hemoglobin pre-filter distribution
Here we plot the distribution of hemoglobin content per cell for all samples before filtering.')
Here we plot the distribution of hemoglobin content per cell for all samples before filtering.
print(dens_hemo)
cat('#### Hemoglobin post-filter distribution
Here we plot the distribution of hemoglobin content per cell for all samples after filtering.')
Here we plot the distribution of hemoglobin content per cell for all samples after filtering.
print(post_dens_hemo)
As stated above, below we detail the analysis for each sample, which includes:
Preprocessing: Normalize and scale gene expression with the Seurat SingleCellTransform (SCT) pipeline
Dimension reduction: Principal component analysis, graph construction, clustering, and visualization via UMAP. Ideally, this step requires some hyperparameter selection, including selecting the number of PCs and Louvain clustering resolution
Marker analysis: using a differential expression test, find the unique marker genes of each cluster
Label transfer: from a reference single-cell dataset, quantify how much each cell resembles a known cell type from a reference dataset, such as one derived from a paper or single-cell database
Principal Component Analysis (PCA) is important for downstream analysis including clustering and visualization with non-linear dimension reduction such as UMAP.
PCA finds high-dimensional planes which vary strongly across the cells. Essentially, each PC consists of very highly correlated genes. The first PC specifically can be thought of as a list of genes that drive together drive the most variance across cells. Each following PC has less variance.
PCA is used to help de-noising the data for downstream tasks like clustering. Individual genes can be noisy, but groups of correlated genes are less noisy.
An important caveat of PCA is that it is a linear method, in that the PC axes it finds are straight lines. This is okay for denoising genes but can often fail to capture the complexity of single-cell RNA-seq and other types of high dimensional genomic datasets. This is why the analysis does not stop at PCA but includes other more complex non-linear methods described below.
One important parameter for downstream analysis is the selection of how many PCs to use. We want to select the PCs that explain a sufficient amount of variance in the data. One way to do that is via “elbow plots” of each PC versus the standard deviation. The cutoff is made at the PC at which the SD stabilizes and becomes horizontal.
Including too few PCs can mean missing important sources of variations downstream and may result in for example, cell types being merged together in a single cluster. Conversely, including too many PCs can introduce noise into the data and result in clusters not easily explained by biology.
By default, we set the number of PCs to use as 30, which will be appropriate for most samples and is recommended as a default by the developers of SingleCellTransform. If the “Elbow” in the elbow plots strongly deviates from this, a different value may be selected and the analysis can be rerun later.
#clean up env
rm(aflist, bsl, combmd,
dens_feature, post_dens_feature,
dens_hemo, post_dens_hemo,
dens_mito, post_dens_mito,
dens_UMI, post_dens_UMI,
fs, maxdens, mdlist,
submd, featlims, umilims)
invisible(gc(full = T, reset = F, verbose = F))
#test if force_redo is T or if saved sobj result does not exist
## if test == T, do it, if not skip it
### WILL IMPLEMENT LATER
### may implement later: read in serial, process parallel?
# readin serial, split up according to num workers
# https://stackoverflow.com/questions/3318333/split-a-vector-into-chunks
# x = sample_metadata$code; split(x, ceiling(seq_along(x)/workernum))
# force_redo_test <- T
#read in data and run PCA
sobjlist <- lapply(sample_metadata$Code, function(code){
sobjfile <- paste0(qctmpdir, '/', code, '.rds')
# sobjfile <- paste0(outdir_indi_seuratobjs, '/SeuratObject-', code, '.rds')
sobj <- readRDS(sobjfile)
DefaultAssay(sobj) <- 'RNA'
sobj <- SCTransform(sobj, assay = "RNA", verbose = verbose, method = 'glmGamPoi', vst.flavor='v2')
sobj <- RunPCA(sobj, assay = "SCT", verbose = verbose)
sobj
})
names(sobjlist) <- sample_metadata$Code
# #read from tmp dir
# sobjfile <- paste0(qctmpdir, '/SeuratObject-', code, '.rds')
# #remove tmp dir
unlink(qctmpdir, recursive = T)
## check elbow plot
elbowplots <- lapply(sobjlist, function(sobj){
ElbowPlot(sobj, ndims = 50) + ggtitle(sobj@project.name)
})
#plot one at a time rather than side by side, label was getting cut off
#patchwork::wrap_plots(elbowplots)
for(i in 1:length(elbowplots)){
print(elbowplots[[i]] + geom_vline(xintercept = pcs_indi, linetype = 'dotted', color = 'red'))
}
Once PCA is completed and the number of critical PCs is selected, we perform a number of steps that all involve grouping cells together based on shared transcriptomic patterns:
k-nearest neighbor graph construction
Louvain clustering
Non-linear dimensionality reduction for visualization, such as t-SNE or UMAP
This process allows us to group cells together based on transcriptomic similarity. Louvain clustering optionally allows for input of a hyperparameter called “resolution,” with high resolution finding larger numbers of clusters. By default, we set this value to 0.5.
#clean env
rm(elbowplots)
invisible(gc(full = T, reset = F, verbose = F))
# ## using ndims_vec, continue individual sample pre-processing.
#
# Calculate graph, clustering, and umap.
sobjlist <- lapply(sobjlist, function(sobj){
# ndim_sample <- ndims_vec[sobj@project.name] #sample-wise PC selection
ndim_sample <- pcs_indi
sobj <- FindNeighbors(sobj, reduction = "pca", dims = c(1:ndim_sample), verbose = verbose)
sobj <- FindClusters(sobj, verbose = verbose, resolution = res_indi)
# #add 3d UMAP first
# sobj <- Seurat::RunUMAP(sobj, reduction = "pca", dims = c(1:ndim_sample),
# n.components = 3,
# reduction.name = 'UMAP3D', reduction.key = 'UMAP3D_')
#add normal UMAP
sobj <- RunUMAP(sobj, reduction = "pca", dims = c(1:ndim_sample), verbose = verbose)
sobj
})
# ```
# ```{r markers_individualsamps_clusters}
# ## calulcate markers for individual sample clusters
#
# This is parallelized with Future as per Seurat recommendations.
# Future multisession mode.
#turn off future, it seems to break things
# future::plan('multisession', workers=workernum)
# save markers
indimarkerdir <- paste0(outdir_indi, '/individualsample_clustermarkers/')
sampmarkersave <- paste0(indimarkerdir, '/markers-PCs_', pcs_indi, '-res_', res_indi, '/' )
dir.create(sampmarkersave, recursive = T)
mlist_individualsamples_clusters <- lapply(sample_metadata$Code, function(code){
sampmarkersave_eachsamp <- paste0(sampmarkersave, code, '_clustermarkers.csv')
#turn off file exists read in, too dangerous
# if(!file.exists(sampmarkersave_eachsamp)){
#
sobj <- sobjlist[[code]]
m <- FindAllMarkers(sobj, only.pos = T, verbose = verbose)
# as of Nov 9 2023 (Seurat v5): add score to markers
m$score <- (m$pct.1 - m$pct.2) * m$avg_log2FC
write.csv(m, sampmarkersave_eachsamp, quote = F, row.names = F)
#
# } else{
# m <- read.csv(sampmarkersave_eachsamp)
# }
return(m)
})
names(mlist_individualsamples_clusters) <- sample_metadata$Code
# future::plan(strategy = 'sequential')
if(use_labeltransfer == T){
text <- '## Label transfer from reference scRNA-seq data
With a reference single-cell RNA-seq dataset, we use label transfer to infer which celltypes are present in the new data.
One way to do this is via "label transfer", an integration-based machine learning method for classification. We use the [Seurat method](https://www.cell.com/cell/fulltext/S0092-8674(19)30559-8) for label transfer here.
'
cat(text)
}
With a reference single-cell RNA-seq dataset, we use label transfer to infer which celltypes are present in the new data.
One way to do this is via “label transfer”, an integration-based machine learning method for classification. We use the Seurat method for label transfer here.
if(use_labeltransfer == T){
labeltransfer_outdir_indi <- paste0(outdir_indi, '/labeltransfer/')
dir.create(labeltransfer_outdir_indi, recursive = T)
reference <- readRDS(refdatapath)
### unfortunately, Seurat does not like underscores in feature names, so we need to replace
if( any(grepl('_', reference$Celltype)) ){
ct <- reference$Celltype
ct <- as.character(ct)
ct <- gsub('_', '-', ct)
ct <- factor(ct, levels = names(sort(table(ct), decreasing = T)) )
reference$Celltype <- ct
}
# #pre-process the reference data
# # make sure this is done beforehand!!!
# reference <- SCTransform(reference, ncells = 3000, verbose = verbose) %>%
# RunPCA(verbose = verbose) %>%
# RunUMAP(dims = 1:30, verbose = verbose)
#
# reference$Celltype <- reference$subclass
#
# refdatapath = "data/vignette/allen_cortex_preproc.rds"
# saveRDS(reference, refdatapath)
### actually label-transfer ###
sobjlist <- lapply(sobjlist, function(sobj){
if(verbose == T){ message('\nLabel transfer for: ',sobj@project.name, '\n') }
#get the integration score matrix
# if( SeuratLabelTransfer.normalization.method == 'auto' ){
#
#
# if('SCT' %in% names(reference@assays) ){
# SeuratLabelTransfer.normalization.method <- 'SCT'
# } else{
# SeuratLabelTransfer.normalization.method <- 'LogNormalize'
# }
#
#
# }
SeuratLabelTransfer.normalization.method = 'SCT'
anchors <- FindTransferAnchors(reference = reference, query = sobj, normalization.method = SeuratLabelTransfer.normalization.method, verbose = verbose)
predictions.assay <- TransferData(anchorset = anchors,
refdata = reference$Celltype,
prediction.assay = T,
weight.reduction = sobj[["pca"]], dims = 1:30, verbose = verbose)
sobj[["predictions"]] <- predictions.assay
#get the top calls for each cell
topcalls <- TransferData(anchorset = anchors, refdata = reference$Celltype, prediction.assay = F,
weight.reduction = sobj[["pca"]], dims = 1:30, verbose = verbose)
levs <- names( sort(table(topcalls$predicted.id), decreasing = T) )
sobj$top_celltype_call_seurat <- factor(topcalls$predicted.id, levels = levs)
sobj$top_celltype_call_seurat_score <- topcalls$prediction.score.max
#add thresholded score
md <- sobj@meta.data
md$top_celltype_thresholded <- md$top_celltype_call_seurat
md[md$top_celltype_call_seurat_score < 0.3, 'top_celltype_thresholded'] <- NA
sobj$top_celltype_thresholded <- md$top_celltype_thresholded
### get cluster-wise labeltransfer max ###
#get predmat
predmat <- sobj@assays$predictions@data
#drop "max"
predmat <- predmat[1:(nrow(predmat)-1), ]
#for each cluster, get max
md <- sobj@meta.data
clustname <- 'seurat_clusters'
clustmaxlist <- lapply(levels(md[,clustname]), function(clust){
clustcells <- rownames( md[md[,clustname] == clust,] )
clustpred <- predmat[,colnames(predmat) %in% clustcells,drop=F]
clust_avgscores <- Matrix::rowMeans(clustpred)
maxscore <- clust_avgscores[which.max(clust_avgscores)]
data.frame(cluster = clust, max = names(maxscore), score = maxscore, row.names = NULL)
})
clustmaxdf <- dplyr::bind_rows(clustmaxlist)
#make a label
clustmaxdf$label <- paste0( clustmaxdf$max)
#uncertainty... if < 0.3, put as NA
# if between 0.3 - 0.6, put putative
clustmaxdf[clustmaxdf$score < 0.3,'label'] <- NA
clustmaxdf[clustmaxdf$score >= 0.3 & clustmaxdf$score < 0.5,'label'] <- paste0( clustmaxdf[clustmaxdf$score >= 0.3 & clustmaxdf$score < 0.5,'label'],
'_putative')
#add to metadata
sobj$celltype_cluster_prediction <- sobj$seurat_clusters
sobj$celltype_cluster_prediction <- plyr::mapvalues(sobj$celltype_cluster_prediction,
from = levels(sobj$celltype_cluster_prediction),
to = clustmaxdf$label)
#write out cluster-celltype mapping
labeltransfer_outdir_indi_code <- paste0(labeltransfer_outdir_indi, '/', sobj@project.name, '/')
dir.create(labeltransfer_outdir_indi_code)
clustcelltypemapfile <- paste0(labeltransfer_outdir_indi_code, '/ClusterCelltypeMapping.csv')
write.csv(x=clustmaxdf,
file = clustcelltypemapfile,
row.names = F, quote = F)
sobj
})
# m_reference <- readRDS(m_reference)
# actually we only need this later. read in at the righ time. for now save path
m_reference_path <- m_reference
rm(reference) #for memory saving
# ```
invisible(gc(full = T, reset = F, verbose = F))
}
In each sample, we perform clustering and label transfer. Here we plot the clusters and cluster markers for each sample. This can help identify which cell types are present in each sample.
#save each as pdf, one per sample...
dir.create( paste0(outdir_indi, '/individualsample_plots/') , recursive = T)
#prep summary plots for each sample
# umap of clusters
# insert QC per cluster plots here?
# cluster markers
summaryplots_individualsamples <- lapply( sobjlist , function(sobj){
# set up title
sampname <- sobj@project.name
#get markers
m <- mlist_individualsamples_clusters[[sampname]]
#clusters plot
# for auto plotting with manually set res, need to use paste here...
plottingvar <- paste0('SCT_snn_res.', res_indi)
#dimplot of clusters
d1_a <- wrap_plots(
DimPlot(sobj, group.by = plottingvar, label = T, repel = T)
) + plot_annotation(title = sampname, caption = 'Louvain Clusters plotted on UMAP')
# ## 3d umap ##
# embs <- as.data.frame(sobj@reductions$UMAP3D@cell.embeddings)
# embs <- cbind(embs, sobj$seurat_clusters); colnames(embs)[4] <- 'seurat_clusters'
# embs$cluster_color <- plyr::mapvalues(embs$seurat_clusters,
# from = levels(embs$seurat_clusters),
# to = scales::hue_pal()(length(levels(embs$seurat_clusters))))
# #add labels...
# labdf <- aggregate(cbind(UMAP3D_1, UMAP3D_2, UMAP3D_3) ~ seurat_clusters, embs, median)
# labdf$cluster_color <- levels(embs$cluster_color)
#
# #store inrgedients, run rgl when its time to print
# three_d_umap_ingrdients <- list(embs = embs, labdf=labdf)
#
#
# qc plots, do it with patchwork
d0 <- wrap_plots(ncol = 2, list(
VlnPlot(sobj, 'nCount_RNA', pt.size = 0.1)+NoLegend() ,
VlnPlot(sobj, 'nFeature_RNA', pt.size = 0.1)+NoLegend(),
VlnPlot(sobj, 'percent.mito', pt.size = 0.1)+NoLegend() ,
VlnPlot(sobj, 'percent.hemoglobin', pt.size = 0.1)+NoLegend()
)) + plot_annotation(title = sampname)
# cluster markers
DefaultAssay(sobj) <- 'SCT'
n <- 5
top <- m %>% group_by(cluster) %>% top_n(n = n, wt = score)
genes <- top$gene
#make sure genes are in
if( any( !(genes %in% rownames(sobj@assays$SCT@scale.data)) ) ){
#try getresidual...
missinggenes <- genes[!(genes %in% rownames(sobj@assays$SCT@scale.data))]
sobj <- GetResidual(sobj, missinggenes, na.rm = F, replace.value = T)
#it can be complicated doing this after integration, some genes are NAs...
scgem <- sobj@assays$SCT@scale.data
if( any( !complete.cases(scgem) ) ){
scgem <- scgem[complete.cases(scgem),]
top <- top[top$gene %in% rownames(scgem),]
sobj@assays$SCT@scale.data <- scgem
}
rm(scgem)
}
#prep heatmap
top <- top[top$gene %in% rownames(sobj),]
gem <- sobj@assays$SCT@scale.data
gem <- gem[match(top$gene, rownames(gem)),]
#annot for clusters
#first order gem by cluster...
md <- sobj@meta.data
md <- md[order(md$seurat_clusters),]
gem <- gem[,match(rownames(md), colnames(gem))]
clust_bc <- setNames(md$seurat_clusters,
nm = colnames(gem)
)
col_clust <- setNames(scales::hue_pal()(length(levels(sobj$seurat_clusters))),
nm = levels(sobj$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc, col = list(Cluster = col_clust), show_legend = F)
#annot for markers
#set genes according to ct_ordered
top$cluster <-factor(top$cluster, levels=unique(top$cluster))
# top <- top[order(top$cluster),]
gem <- gem[match(top$gene, rownames(gem)),]
ct_gene <- setNames(top$cluster,
nm=top$gene)
col_gene <- col_clust
col_gene <- col_gene[names(col_gene) %in% top$cluster]
ha_genes <- ComplexHeatmap::rowAnnotation(Cluster = ct_gene, col = list(Cluster = col_gene),
show_annotation_name=F)
#restrict range
gem[gem>5] <- 5
gem[gem<-5] <- -5
#actual heatmap
hm <- ComplexHeatmap::Heatmap(gem,
# column_title = 'Integrated clusters',
column_labels = rep('', ncol(gem)),
row_names_gp = grid::gpar(fontsize = 5),
column_split = md$seurat_clusters,
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 5),
row_gap = unit(0.8, "mm"),
column_gap = unit(0.8, "mm"),
row_title_rot = 0,
column_title_rot = 45,
column_title_gp = grid::gpar(fontsize = 7),
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
# invisible(hm <- draw(hm, column_title = sampname))
list(
d1_a=d1_a,
# three_d_umap_ingrdients = three_d_umap_ingrdients,
hm=hm,
d0=d0
)
} )
#name the list elements, one per sample
names(summaryplots_individualsamples) <- sapply(sobjlist, function(sobj){sobj@project.name}, simplify = T)
#save as PDFs
lapply(names(summaryplots_individualsamples), function(sampname){
pdf( paste0(outdir_indi, '/individualsample_plots/', sampname, '.pdf'), width = 9, height = 9 )
print( summaryplots_individualsamples[[sampname]]$d1_a )
print( summaryplots_individualsamples[[sampname]]$hm )
print( summaryplots_individualsamples[[sampname]]$d0 )
dev.off()
})
if(use_labeltransfer == T){
## plots of cell type predictions from label transfer
ctplots_individualsamples <- lapply( sobjlist, function(sobj){
#set up title
sampname <- sobj@project.name
#clusters plot
# for auto plotting with manually set res, need to use paste here...
plottingvar <- paste0('SCT_snn_res.', res_indi)
#dimplot of clusters
d1_a <- wrap_plots(
DimPlot(sobj, group.by = plottingvar, label = T, repel = T)
) + plot_annotation(title = sampname, caption = 'Louvain Clusters plotted on UMAP')
### prep the Celltype Plots ("ctplots")
# violinplots of all celltypes
#get exp of the celltypes
rs <- Matrix::rowSums(sobj@assays$predictions@data)
rs <- head( rs , -1)
#keep only celltypes that are exp
rs <- sort(rs[rs>0], decreasing = T)
ct_in <- names(rs)
#plots of all celltypes in data...
DefaultAssay(sobj) <- 'predictions'
ct_plots <- lapply(setNames(ct_in,ct_in), function(ct){
#FeaturePlot(sobj, features = ct) + ggtitle(sampname)
ct_plot <- VlnPlot(sobj, ct)+NoLegend()+ylab('Prediction score') +
plot_annotation(title = sampname)
return(ct_plot)
})
DefaultAssay(sobj) <- 'SCT'
#heatmap of the prediction scores per cluster
# use scaled values, comparable between clusters
# get avgs
avgs <- AverageExpression(sobj, assays = 'predictions', return.seurat = F)
#remove max
avgs <- head( as.data.frame(avgs) , -1)
# remove empty prediction rows with all 0s
avgs <- avgs[Matrix::rowSums(avgs)>0,]
#make column titles nicer
colnames(avgs) <- gsub('predictions.', 'cluster_', colnames(avgs))
#format as numeric matrix
avgs <- as.matrix(avgs)
### scale --> this emphasizes diffs between clusters, seems to put related cell types together
avgs <- t(scale(t(avgs)))
#select middle value for color scale
medval <- mean(avgs)
#plot it
hm_ctscores <- ComplexHeatmap::Heatmap(avgs,
name = 'Scaled\nmean prediction\nscores per cluster',
column_title = sampname,
rect_gp = grid::gpar(col = "white", lwd = 0.5),
circlize::colorRamp2(c(min(avgs), medval, max(avgs)), c("blue", "white", "red")))
#for dotplot, add dendrogram and row label order
# it may throw a warning about drawing plot first etc,
#shouldn't be an issue if we set seed, which we did
suppressWarnings(
dend <- row_dend(hm_ctscores)
)
#get ordered row labels for dotplot and other plots
suppressWarnings(
ct_ordered <- rownames(hm_ctscores@matrix)[ComplexHeatmap::row_order(hm_ctscores)]
)
nicedend <- ggdendro::ggdendrogram(rev(dend), rotate = T) +
scale_y_reverse(expand = c(0.05, 0))+
theme(axis.text.y = element_blank(),
axis.text.x = element_blank())
dp_ctscores <- DotPlot(sobj, assay = 'predictions', rev(ct_ordered)) +
coord_flip() +
theme(axis.title.y=element_blank(),
axis.text.y = element_text(hjust = 0),
axis.text.x = element_text(size=5) )+
scale_color_gradient2(low = 'blue', high = 'red', mid = 'grey')+
xlab(label = 'Cluster')+
guides(color = guide_colorbar(title = "Scaled Average\nPrediction Score"))
dp_ctscores <- patchwork::wrap_plots(list(nicedend, dp_ctscores), widths = c(0.2,1))
# heatmap of reference markers
m_reference <- readRDS(m_reference_path)
#make sure to remove underscores, since seurat doesn't like it in label transfer feature names
m_ref_small <- m_reference
lt_ref_levs <- levels(m_ref_small$cluster)
lt_ref_levs <- gsub('_', '-', lt_ref_levs)
m_ref_small$cluster <- plyr::mapvalues(m_ref_small$cluster, from = levels(m_ref_small$cluster), lt_ref_levs)
m_ref_small <- m_ref_small[m_ref_small$gene %in% rownames(sobj),]
m_ref_small <- m_ref_small[m_ref_small$cluster %in% ct_in,]
n <- 5
top <- m_ref_small %>% group_by(cluster) %>% top_n(n = n, wt = avg_log2FC)
genes <- top$gene
#make sure genes are in
if( any( !(genes %in% rownames(sobj@assays$SCT@scale.data)) ) ){
missinggenes <- genes[!(genes %in% rownames(sobj@assays$SCT@scale.data))]
sobj <- GetResidual(sobj, genes)
}
#prep heatmap
top <- top[top$gene %in% rownames(sobj),]
gem <- sobj@assays$SCT@scale.data
gem <- gem[match(top$gene, rownames(gem)),]
#annot for clusters
#first order gem by cluster...
md <- sobj@meta.data
md <- md[order(md$seurat_clusters),]
gem <- gem[,match(rownames(md), colnames(gem))]
clust_bc <- setNames(md$seurat_clusters,
nm = colnames(gem)
)
col_clust <- setNames(scales::hue_pal()(length(levels(sobj$seurat_clusters))),
nm = levels(sobj$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc,
col = list(Cluster = col_clust),
show_legend = F
)
#annot for markers
#set genes according to ct_ordered
top$cluster <-factor(top$cluster, levels=ct_ordered)
top <- top[order(top$cluster),]
gem <- gem[match(top$gene, rownames(gem)),]
ct_gene <- setNames(top$cluster,
nm=top$gene)
coul <- RColorBrewer::brewer.pal(8, "Set2")
coul <- colorRampPalette(coul)(length(unique(top$cluster)))
col_gene <- setNames(coul, nm = unique(top$cluster))
ha_genes <- ComplexHeatmap::rowAnnotation(Celltype = ct_gene, col = list(Celltype = col_gene),
show_annotation_name=F)
#restrict range
gem[gem>5] <- 5
gem[gem<-5] <- -5
#actual heatmap
hm_refmarkers <- ComplexHeatmap::Heatmap(gem,
#column_title = sampname,
column_labels = rep('', ncol(gem)),
row_names_gp = grid::gpar(fontsize = 5),
column_split = md[,paste0('SCT_snn_res.', res_indi)],
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 5),
row_gap = unit(0.8, "mm"),
column_gap = unit(0.8, "mm"),
column_title_rot = 45,
column_title_gp = grid::gpar(fontsize = 7),
row_title_rot = 0,
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
# hm_refmarkers <- draw(hm_refmarkers, column_title = sampname)
#try to average the matrix
avgl <- lapply(levels(md$seurat_clusters), function(clust){
mdc <- md[md$seurat_clusters==clust,]
gemc <- gem[,colnames(gem) %in% rownames(mdc)]
avg <- matrix(rowMeans(gemc),
dimnames = list(rownames(gem), clust))
avg
})
avg <- do.call('cbind',avgl)
#need to re-prep column annot
clust_bc <- factor(str_sort(colnames(avg), numeric = T), levels = str_sort(colnames(avg), numeric = T))
col_clust <- setNames(scales::hue_pal()(length(levels(sobj$seurat_clusters))),
nm = levels(sobj$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc, col = list(Cluster = col_clust), show_legend = F)
hm_refmarkers_avg <- ComplexHeatmap::Heatmap(avg,
#column_title = sampname,
row_names_gp = grid::gpar(fontsize = 6),
column_split = factor(str_sort(colnames(avg), numeric = T), levels = str_sort(colnames(avg), numeric = T)),
column_names_gp = grid::gpar(fontsize = 6),
column_title_rot = 45,
column_names_rot = 0,
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 6),
row_gap = unit(0.5, "mm"),
column_gap = unit(0.5, "mm"),
row_title_rot = 0,
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
# hm_refmarkers_avg <- draw(hm_refmarkers_avg, column_title = sampname)
#top cell type plots
plottingvar <- 'top_celltype_thresholded'
d2_a <- wrap_plots(
DimPlot(sobj, group.by = plottingvar, label = T, repel = T)
) + plot_annotation(title = sampname)
d2_b <- wrap_plots(
FeaturePlot(sobj, 'top_celltype_call_seurat_score')) + plot_annotation(title = sampname)
#alluvial plot, clusters to cell types
md <- sobj@meta.data
labelsdf <- md[,c('seurat_clusters', 'top_celltype_thresholded')]
ap <- wrap_plots( alluvialplot(labelsdf, repel = T, direction='y')
) + plot_annotation(title = sampname)
## read in cluster CT mapping
#write out cluster-celltype mapping
labeltransfer_outdir_indi_code <- paste0(labeltransfer_outdir_indi, '/', sobj@project.name, '/')
clustcelltypemapfile <- paste0(labeltransfer_outdir_indi_code, '/ClusterCelltypeMapping.csv')
clustmaxdf <- read.csv(clustcelltypemapfile)
#also plot the cluster celltype call on dimplot
d3 <- DimPlot(sobj, group.by = 'celltype_cluster_prediction', label = T, repel = T)
#using heatmap dendrogram, order in a cool way
suppressWarnings(
ct_ordered <- rownames(hm_ctscores@matrix)[ComplexHeatmap::row_order(hm_ctscores)]
)
ct_plots <- ct_plots[ct_ordered]
list(d1_a=d1_a,
hm_ctscores=hm_ctscores,
dp_ctscores=dp_ctscores,
hm_refmarkers=hm_refmarkers,
hm_refmarkers_avg=hm_refmarkers_avg,
d2_a=d2_a,
d2_b=d2_b,
ap=ap,
d3 = d3,
clustmaxdf=clustmaxdf,
ct_plots=ct_plots
)
})
#name them with the code names
names(ctplots_individualsamples) <- sapply(sobjlist, function(sobj){sobj@project.name}, simplify = T)
#save as PDFs
lapply(names(ctplots_individualsamples), function(sampname){
#print to labeltransfer df
labeltransfer_outdir_indi_code <- paste0(labeltransfer_outdir_indi, '/', sampname, '/')
pdf( paste0(labeltransfer_outdir_indi_code, '/LabelTransferSummaryPlots.pdf'), width = 9, height = 9 )
## print each but make sure table gets printed as a pdftable ##
ctplots_thissamp <- ctplots_individualsamples[[sampname]]
for(i in 1:length(ctplots_thissamp) ){
if(names(ctplots_thissamp)[i] == 'clustmaxdf'){
print( scDAPP::pdftable(ctplots_thissamp[[i]], title = 'Cluster-Celltype Mapping') )
} else{
print(ctplots_thissamp[[i]])
}
}
dev.off()
return(sampname)
})
}
#print to report
# do this in a way that creates a section for each sample...
# https://stackoverflow.com/questions/36674824/use-loop-to-generate-section-of-text-in-rmarkdown
template <- "
## Sample %s
" # don't forget the newline
for (i in 1:length( names(summaryplots_individualsamples) ) ) {
sampname <- names(summaryplots_individualsamples)[i]
cat(sprintf(template, sampname))
sampsumplots <- summaryplots_individualsamples[[i]]
#plot each plot one at a time with appropriate label
plotlab <- "
### UMAP of clusters
Here we plot a UMAP of the Louvain clusters for this sample.
"
cat( plotlab )
print( sampsumplots$d1_a )
# ## 3d umap ##
#
#
# plotlab <- "
#
#
# ### 3D UMAP of clusters
#
# Here we plot a 3D UMAP of the Louvain clusters for this sample.
#
#
# "
#
# cat( plotlab )
# print( sampsumplots$d1_a )
#
# embs <- as.data.frame(sobj@reductions$UMAP3D@cell.embeddings)
# embs <- cbind(embs, sobj$seurat_clusters); colnames(embs)[4] <- 'seurat_clusters'
# embs$cluster_color <- plyr::mapvalues(embs$seurat_clusters,
# from = levels(embs$seurat_clusters),
# to = scales::hue_pal()(length(levels(embs$seurat_clusters))))
# #add labels...
# labdf <- aggregate(cbind(UMAP3D_1, UMAP3D_2, UMAP3D_3) ~ seurat_clusters, embs, median)
# labdf$cluster_color <- levels(embs$cluster_color)
#
# #store inrgedients, run rgl when its time to print
# three_d_umap_ingrdients <- summaryplots_individualsamples$three_d_umap_ingrdients
# embs <- three_d_umap_ingrdients$embs
# labdf <- three_d_umap_ingrdients$labdf
#
#
# UMAP3D_1 = embs$UMAP3D_1; UMAP3D_2 = embs$UMAP3D_2; UMAP3D_3 = embs$UMAP3D_3
# rgl::plot3d(UMAP3D_1, UMAP3D_2, UMAP3D_3, col = embs$cluster_color)
# rgl::text3d(labdf$UMAP3D_1, labdf$UMAP3D_2, labdf$UMAP3D_3, texts = labdf$seurat_clusters,
# adj = c(2,2,2))
#
#
plotlab <- "
### Heatmap of cluster markers
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
"
cat( plotlab )
print( sampsumplots$hm )
plotlab <- "
### QC plots
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
"
cat( plotlab )
print( sampsumplots$d0 )
}
Here we plot a UMAP of the Louvain clusters for this sample.
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
Here we plot a UMAP of the Louvain clusters for this sample.
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
Here we plot a UMAP of the Louvain clusters for this sample.
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
Here we plot a UMAP of the Louvain clusters for this sample.
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
Here we plot a UMAP of the Louvain clusters for this sample.
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
Here we plot a UMAP of the Louvain clusters for this sample.
Here we plot the top 5 marker genes of each cluster as identified by wilcoxon test. Sometimes, clusters may share the same markers, which may indicate the clusters are similar, such as subtypes of the same cell type (for example, CD4 vs CD8 T cells).
Here we plot some QC information, including the number of UMIs (nCount_RNA) and the number of unique genes (nFeature_RNA). Very low values may indicate a low number of cells in a specific area or low quality. If the clustering and markers look very strange, the quality metrics may help diagnose issues by indicating a cluster of poor quality cells.
if(use_labeltransfer == T){
text <- '
# Individual sample label transfer cell type predictions
For each sample we perform label transfer from a reference single-cell RNA-seq dataset. Here, we plot the results of the label transfer. Using a prior single-cell RNAseq dataset, we learn which cell types are present in the current data.
Using label transfer, we can get a score for each cell type in the reference dataset. A high score indicates a strong and confident match, while a low score indicates a match of low confidence and may represent a cell type that was not captured in the reference.
Note that we apply a label transfer score cutoff of 0.3. If the cells are below this, they are marked as NA. These may represent cells missing from the reference.
It can be complex and time-consuming to analyze each sample individually before integration; however, the reward is often worth the effort. Characterizing each sample allows us to assess the presence of cell types in each sample carefully. For example, we may find that all the samples contain neuron cells, while only one sample may contain macrophage cells, but other samples do not. This is useful for interpreting the downstream results of differential expression across samples; for example, it would be wise to focus on a comparative analysis of the cell types in all samples (neurons), and simply note that macrophages are present in some but missing in other samples.
Additionally, it is also useful to observe how the cell types are related to one another in the individual samples. We may observe that immune cell types like T cells and B cells may cluster closely together, while very different cell types, such as neurons and endothelial cells, may cluster far apart. Identifying such clustering patterns in individual samples may help identify cases of "over-integration" or other issues.
'
cat(text)
}
For each sample we perform label transfer from a reference single-cell RNA-seq dataset. Here, we plot the results of the label transfer. Using a prior single-cell RNAseq dataset, we learn which cell types are present in the current data.
Using label transfer, we can get a score for each cell type in the reference dataset. A high score indicates a strong and confident match, while a low score indicates a match of low confidence and may represent a cell type that was not captured in the reference.
Note that we apply a label transfer score cutoff of 0.3. If the cells are below this, they are marked as NA. These may represent cells missing from the reference.
It can be complex and time-consuming to analyze each sample individually before integration; however, the reward is often worth the effort. Characterizing each sample allows us to assess the presence of cell types in each sample carefully. For example, we may find that all the samples contain neuron cells, while only one sample may contain macrophage cells, but other samples do not. This is useful for interpreting the downstream results of differential expression across samples; for example, it would be wise to focus on a comparative analysis of the cell types in all samples (neurons), and simply note that macrophages are present in some but missing in other samples.
Additionally, it is also useful to observe how the cell types are related to one another in the individual samples. We may observe that immune cell types like T cells and B cells may cluster closely together, while very different cell types, such as neurons and endothelial cells, may cluster far apart. Identifying such clustering patterns in individual samples may help identify cases of “over-integration” or other issues.
if(use_labeltransfer == T){
#print to report
# do this in a way that creates a section for each sample...
# https://stackoverflow.com/questions/36674824/use-loop-to-generate-section-of-text-in-rmarkdown
template_samples <- "
## Sample %s
"
template_plots <- "
### %s
"
template_celltypes <- "
#### %s
"
for (i in 1:length( names(ctplots_individualsamples) ) ) {
sampname <- names(ctplots_individualsamples)[i]
cat(sprintf(template_samples, sampname))
ctplotlist <- ctplots_individualsamples[[i]]
#print them, with titles
plotlab <- "
### Heatmap of label transfer cell type scores
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
"
cat( plotlab )
print(ctplotlist$hm_ctscores)
plotlab <- "
### Dotplot of label transfer cell type scores
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
"
cat( plotlab )
print(ctplotlist$dp_ctscores)
plotlab <- "
### Heatmap of reference cell type markers
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
"
cat( plotlab )
print(ctplotlist$hm_refmarkers)
plotlab <- "
### Heatmap of reference cell type markers averaged across clusters
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
"
cat( plotlab )
print(ctplotlist$hm_refmarkers_avg)
plotlab <- "
### UMAP of top scoring celltypes
The top scoring celltype for each cell is plotted on the UMAP.
"
cat( plotlab )
print(ctplotlist$d2_a)
plotlab <- "
### UMAP of label transfer scores
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
"
cat( plotlab )
print(ctplotlist$d2_b)
plotlab <- "
### Alluvial plot mapping between Louvain clusters to top celltypes
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster.
The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
"
cat( plotlab )
print(ctplotlist$ap)
plotlab <- '
### Table showing cluster-celltype mapping
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset.
Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix "_putative" to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
'
cat( plotlab )
clustmaxprint <- ctplotlist$clustmaxdf
clustmaxprint$score <- round(clustmaxprint$score, digits = 5)
print(knitr::kable(clustmaxprint))
plotlab <- "
### UMAP of cluster-celltype mapping
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
"
cat( plotlab )
print(ctplotlist$d3)
plotlab <- "
### Violin plots for each cell type
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
"
cat( plotlab )
for(j in 1:length(ctplotlist$ct_plots) ){
ctname <- names(ctplotlist$ct_plots[j])
cat(sprintf(template_celltypes, ctname))
print( ctplotlist$ct_plots[[j]] )
}
}
}
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype for each cell is plotted on the UMAP.
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster. The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 0 | NK | 0.96341 | NK |
| 1 | MNP | 1.00000 | MNP |
| 2 | T | 0.99916 | T |
| 3 | B | 0.97787 | B |
| 4 | T | 0.91109 | T |
| 5 | B | 1.00000 | B |
| 6 | MNP | 1.00000 | MNP |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype for each cell is plotted on the UMAP.
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster. The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 0 | NK | 0.97930 | NK |
| 1 | MNP | 1.00000 | MNP |
| 2 | T | 1.00000 | T |
| 3 | B | 0.97849 | B |
| 4 | B | 0.99921 | B |
| 5 | T | 0.99633 | T |
| 6 | MNP | 1.00000 | MNP |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype for each cell is plotted on the UMAP.
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster. The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 0 | B | 0.98125 | B |
| 1 | NK | 0.98922 | NK |
| 2 | MNP | 0.99991 | MNP |
| 3 | T | 0.99811 | T |
| 4 | T | 0.98932 | T |
| 5 | NK | 0.99483 | NK |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype for each cell is plotted on the UMAP.
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster. The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 0 | NK | 0.88159 | NK |
| 1 | MNP | 1.00000 | MNP |
| 2 | B | 0.96980 | B |
| 3 | T | 0.99507 | T |
| 4 | T | 0.93617 | T |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype for each cell is plotted on the UMAP.
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster. The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 0 | T | 0.99916 | T |
| 1 | MNP | 1.00000 | MNP |
| 2 | B | 0.99378 | B |
| 3 | NK | 0.86349 | NK |
| 4 | B | 0.99557 | B |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above.
However, the dotplot also includes information about how many cells in the cluster express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A blue dot indicates relatively low score. A small dot indicates few cells express the score.
Here we plot the top 5 reference celltype markers as sorted by average log2 fold change. The markers are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type. For example, a cluster with high T cell score should highly express T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype for each cell is plotted on the UMAP.
The scores of the cell types are plotted on the UMAP. This shows how confident the prediction is. If the score is low, it may be that the cells present in this dataset are missing from the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster. The top-scoring cell type is plotted, but some cells and clusters may contain a mix of cell types.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 0 | B | 1.00000 | B |
| 1 | MNP | 1.00000 | MNP |
| 2 | T | 0.99799 | T |
| 3 | NK | 0.96310 | NK |
| 4 | NK | 0.63160 | NK |
| 5 | T | 0.98930 | T |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a violin plot for each cell type to analyze the label transfer scores type in detail.
Here, we combine all of the samples into a single unified dataset.
“Integration” refers to the process of combining together the data from individual samples with batch correction. Typically, library prep is done one sample at a time, which may introduce bias due to technical artifacts associated with processing for that sample, i.e., particular reagent lot used, slightly longer incubation periods, slightly more reads sequenced, etc. To combine samples, we thus apply special batch-correction procedures. The method for doing that here relies on Reference Principal Component Integration, implemented in the RISC package (Liu, Zheng et al Nat Biotech 2021). This approach exploits the fact that cells from the same type typically have similar transcriptomic patterns, and then tries to match the patterns.
Ultimately, the goal of integration is to minimize technical noise and maximize shared biological signals, the strongest of which is typically cell type.
Integration can be contrasted with “concatenation,” which involves simply merging the samples without any batch correction. If there are strong batch effects in the data, this method may result in clusters driven not by cell type but by the sample of origin (i.e., cluster 1 is all cells from sample 1, cluster 2 is all cells from sample 2).
# before integrating, add a few important columns to each sobj metadata
# "sample" column --> switch to Code
# "condition" column
# barcode column
#add sample to each sobj
codes <- sample_metadata$Code
sobjlist <- lapply(codes, function(code){
sample_metadata_code <- sample_metadata[sample_metadata$Code == code,]
sobj <- sobjlist[[code]]
sobj$orig.ident <- code
sobj$Code <- code
sobj$Sample <- sample_metadata_code$Sample
sobj
})
names(sobjlist) <- codes
#add a condition column
sobjlist <- lapply(names(sobjlist), function(sampname){
cond <- sample_metadata[sample_metadata$Code == sampname,"Condition"]
sobj <- sobjlist[[sampname]]
sobj$Condition <- cond
sobj
})
names(sobjlist) <- codes
#add a barcode column
sobjlist <- lapply(names(sobjlist), function(sampname){
sobj <- sobjlist[[sampname]]
md <- sobj@meta.data
md <- cbind(rownames(md), md)
colnames(md)[1] <- 'Barcode'
sobj@meta.data <- md
sobj
})
names(sobjlist) <- codes
# clean up env before doing this
rm(ctplotlist, ctplots_individualsamples, elbowplots, m_reference,
cutoffplots, cutoffs, sampsumplots,
elbowplots,
mlist_individualsamples_clusters,
qc_vln_feature, qc_vln_umi,
summaryplots_individualsamples)
invisible(gc(full = T, reset = F, verbose = F))
#### save each object and get the metadata
#add clusters to each sobjlist object and then save each object
tmpobjdir <- paste0(outdir_indi, '/.tmp_Seurat_objects/')
dir.create(tmpobjdir, recursive = T)
mdlist <- lapply(sample_metadata$Code, function(code){
sobj <- sobjlist[[code]]
saveRDS(sobj, paste0(tmpobjdir, '/', code, '.rds'))
md <- sobj@meta.data
return(md)
})
names(mdlist) <- sample_metadata$Code
### qc purposes, read in --> keep commented
# sobjlist <- lapply(sample_metadata$Code, function(code){
#
# message(code)
# sobj <- readRDS( paste0(tmpobjdir, '/', code, '.rds') )
#
# sobj
#
# })
# names(sobjlist) <- sample_metadata$Code
#remove gene list and seurat objects form env and purge
rm(sobjlist)
invisible(gc(full = T, reset = F, verbose = F))
#read in raw seurat objects, filter using mdlist
if(input_seurat_obj == T){
matlist <- lapply(sample_metadata$Code, function(code){
#get md for this object
md <- mdlist[[code]]
#read raw matrix
samp <- sample_metadata[sample_metadata$Code == code,'Sample']
#for HTO, we will assume saved objects
sobjfile <- paste0(datadir, '/', samp, '.rds')
sobj <- readRDS(sobjfile)
#get RNA counts matrix
# mat0 <- sobj@assays$RNA@counts
mat0 <- GetAssayData(sobj, assay = 'RNA', layer = 'counts')
#filter using sobj metadata
mat0 <- mat0[,match(rownames(md), colnames(mat0))]
return(mat0)
})
} else{
matlist <- lapply(sample_metadata$Code, function(code){
#get md for this object
md <- mdlist[[code]]
#read raw matrix
samp <- sample_metadata[sample_metadata$Code == code,'Sample']
datafp <- paste0(datadir, '/', samp)
# if on hpc, use below
# datafp <- paste0(datadir, '/', samp, '/outs/')
# for the dl data, we need to find the filepath
h5_filename <- grep(pattern = 'filtered_feature_bc_matrix.h5',
list.files(datafp, recursive = T, full.names = T),
value = T)
#get RNA counts matrix
mat0 <- Read10X_h5(h5_filename)
#filter using sobj metadata
mat0 <- mat0[,match(rownames(md), colnames(mat0))]
return(mat0)
})
}
names(matlist) <- sample_metadata$Code
# keep intersect genes; will only remove genes if aligned with different GTFs
var0 <- Reduce(intersect, lapply(matlist, FUN = rownames))
matlist <- lapply(matlist, function(mat){ mat[match(var0, rownames(mat)), ] })
rm(var0)
#join matrices, filter genes from joint mat and proceed with union
bigmat <- do.call(cbind, matlist)
#get num cells expressing
num_nonzeros <- tabulate(bigmat@i + 1)
#get joint filtered genes as those exp by >= 3 cells
joint_filt_genes <- rownames(bigmat)[num_nonzeros >= 3]
# prep the risc objects #
risclist <- lapply(sample_metadata$Code, function(code){
#get md and mat0
md <- mdlist[[code]]
mat0 <- matlist[[code]]
#subset matrix using jointly filtered genes
mat0 <- mat0[rownames(mat0) %in% joint_filt_genes,]
mat0 <- mat0[match(joint_filt_genes, rownames(mat0)),]
#prep metadata for risc
coldata0 <- md
rm(md)
#get barcodes, strip numeric suffix, and add samplename prefix
barcodes <- stringr::str_split_fixed(rownames(coldata0), '-', 2)[,1]
barcodes <- paste0(coldata0$orig.ident, '.',barcodes)
coldata0 <- cbind(barcodes, coldata0)
rm(barcodes)
#make the rowdatadf...
rowdata0 = data.frame(Symbol = rownames(mat0), row.names = rownames(mat0))
#make the risc object
### make sure to set is.filter = F or it will still use sample-specific filtering
dat0 = readsc(mat0, coldata0, rowdata0, is.filter = F)
rm(mat0, rowdata0, coldata0)
return(dat0)
})
names(risclist) <- sample_metadata$Code
## save matlist, we will add it to RNA assay later ##
outdir_int_objects <- paste0(outdir_int, '/data_objects/')
dir.create(outdir_int_objects, recursive = T)
saveRDS(matlist, paste0(outdir_int_objects, '/.concatmatrix.rds'))
#clean env
rm(bigmat, num_nonzeros, joint_filt_genes, matlist, mdlist)
invisible(gc(full = T, reset = F, verbose = F))
#### process each sample ####
# make sure to set ncore to 1 for all; we'll parallelize across samples for this
#prepare the RISC functions
process0 <- function(obj0){
# Filter cells and genes
message('scFilter')
obj0 = RISC::scFilter(obj0, min.UMI = 0, max.UMI = Inf, min.gene = 0, min.cell = 0, is.filter = F)
# Normalize the raw counts
message('scNormalize')
obj0 = RISC::scNormalize(obj0, ncore = 1)
# Find highly variable genes
message('scDisperse')
obj0 = RISC::scDisperse(obj0)
return(obj0)
}
### process each sample with RISC functions, in parallel
cl <- parallel::makeCluster(workernum)
doParallel::registerDoParallel(cl)
risclist <- foreach(dat0 = risclist,
.packages = c('RISC')) %dopar%
{
return( process0(dat0) )
}
parallel::stopCluster(cl)
invisible(gc(full = T, reset = F, verbose = F))
names(risclist) <- sample_metadata$Code
## variable genes - we still use all genes.
#get the intersect of gene names
var0 <- Reduce(intersect, lapply(risclist, FUN = function(x){x@rowdata$Symbol}))
#run inplot - just to show the plot later
#pdf null prevents premature plot showing
pdf( NULL )
ip <- InPlot(risclist, var.gene = var0, Std.cut = 0.95, ncore = workernum)
ip <- patchwork::wrap_plots(ip)
dev.off()
### reference- either user-defined, or guess
if( !(is.null(risc_reference)) ){
#write some text about which one was selected
#cat()
#if provided, we need the numeric index of which sample was given
#it can be either sample or code name
if(any(risc_reference %in% sample_metadata$Code)){
ref <- grep(risc_reference, sample_metadata$Code)
}
#it can be either sample or code name
if(any(risc_reference %in% sample_metadata$Sample)){
ref <- grep(risc_reference, sample_metadata$Code)
}
}
#if not provided, still do automated guess but just use the provided one
# inplot is not automated and does not suggest which sample to use.
# instead, we use the one with most clusters, given that
# all samples were processed otherwise identically (same PCs and resolution)
# get num clusters from each sample
numclusts <- sapply(risclist, function(dat0){
length(unique(dat0@coldata$seurat_clusters))
})
# get cell num score; higher cell num is prioritized
numcells_per_sample <- sapply(risclist, function(dat0){
nrow(dat0@coldata)
})
numcells_per_sample <- numcells_per_sample / max(numcells_per_sample)
#multipy number of cluste4rs by cell num score
numclusts <- numclusts * numcells_per_sample
#average each cluster and get averaged variance?
# do NOT use pseudobulk, which adds up
pbvar <- sapply(risclist, function(dat0){
mat <- dat0@assay$logcount
md <- dat0@coldata
#pseudobulk
pb <- pseudobulk(obj = mat, metadata = md, grouping_colname_in_md = 'seurat_clusters')
#average: divide pseudobulk columns by num cells
numcells <- table(md$seurat_clusters)
pb <- sweep(pb, 2, numcells, FUN = '/')
clustervar <- apply(pb, 2, function(x){var(x)})
mean(clustervar)
})
#multiply number of clusts * average of cluster variance
refscore <- numclusts * pbvar
### try to divide by KS? ###
#autoselect ref
maxautoscore <- which.max(refscore)
# set reference as autoselected; only if not provided
if( is.null(risc_reference) ){
ref <- maxautoscore
}
#need to rearrange the list...
# the reference must be the first list element...
if(ref != 1){
data0 <- list(risclist[[ref]])
names(data0) <- names(risclist)[ref]
for(i in 1:length(risclist)){
if(i != ref){
name = names(risclist)[i]
data0[[name]] <- risclist[[i]]
}
}
} else{
data0 <- risclist
}
rm(risclist)
invisible(gc(full = T, reset = F, verbose = F))
### actual integration ###
#set "eigens", num PCs to use for integration
eigens <- pcs_int
#actually integrate
data0 = scMultiIntegrate(
objects = data0, eigens = eigens, add.Id = NULL, var.gene = var0,
# method = "RPCI",
align = 'OLS', npc = 50, adjust = TRUE,
ncore = workernum,
#do.fast = "AUTO"
)
rm(var0)
invisible(gc(full = T, reset = F, verbose = F))
# integrated UMAP
data0 = scUMAP(data0, npc = eigens, use = "PLS")
#### integrated clustering ####
### AS OF JUL 5 2023 USE scDAPP::scCluster_louvain_res()
# # integrated clustering
# # neighbor = 10 is default... maybe make it a variable too...?
# data0 <- scCluster(data0,
# slot = "cell.pls",
# method = 'louvain',
# npc = eigens,
# neighbor = 10
# )
#
# ### remap clust, biggest to smallest...
# #get risc clust
# rc <- data0@coldata$Cluster
#
# #sort by biggest to smallest
# bs <- sort(table(rc), decreasing = T)
# rc <- plyr::mapvalues(rc, from = names(bs), to = c(1:length(bs)) )
# rc <- factor(rc, levels = 1:length(bs))
#
# #set to object
# data0@coldata$Cluster <- rc
# rm(rc,bs)
data0 = scDAPP::scCluster_louvain_res(object = data0,
slot = "cell.pls",
# method = 'louvain',
npc = eigens,
resolution = res_int,
neighbor = RISC_louvain_neighbors,
)
#save it
outdir_int_objects <- paste0(outdir_int, '/data_objects/')
dir.create(outdir_int_objects)
saveRDS(data0, paste0(outdir_int_objects, '/RISC-object_integrated.rds'))
#cluster markers --> do this with seurat instead
# riscmarkertime <- proc.time()
# risc_clustermarkers = AllMarker(data0, ncore = workernum)
# riscmarkertime <- proc.time() - riscmarkertime
# takes about an hour, even with parallelization
#get matrix, umap, md, pca, etc; then put into seurat object
mat <- do.call(cbind, data0@assay$logcount)
md <- data0@coldata
umap <- data0@DimReduction$cell.umap
pca <- data0@DimReduction$cell.pls
#rename "Cluster" to "RISC cluster"
risc_clust_lab <- paste0('RISC_Louvain_npc', pcs_int, '_res', res_int)
colnames(md)[ncol(md)] <- risc_clust_lab
clustname <- risc_clust_lab
#make obj
sobjint <- CreateSeuratObject(counts = CreateAssayObject(data=mat),
assay = 'RISC', project = 'Integrated',
meta.data = md
)
#add dim reducs
sobjint[['umap']] <- CreateDimReducObject(umap, assay = 'RISC', key = 'UMAP_')
sobjint[['pca']] <- CreateDimReducObject(pca, assay = 'RISC', key = 'PCA_')
#clean env
rm(umap,pca,mat,md, data0)
invisible(gc(full = T, reset = F, verbose = F))
### find markers ###
#set default levels
sobjint <- SetIdent(sobjint, value = sobjint@meta.data[,risc_clust_lab])
sobjint$seurat_clusters <- sobjint@meta.data[,risc_clust_lab]
#save object
saveRDS(sobjint, paste0(outdir_int_objects, '/Seurat-object_integrated.rds'))
#save markers
intmarkersdir <- paste0(outdir_int, '/markergenes_intclusters/')
dir.create(intmarkersdir, recursive = T)
intmarkersfile <- paste0(intmarkersdir, '/intmarkers-npc', pcs_int, '-res', res_int, '.csv')
#get markers, parallelized
# TURN OFF PAR FOR NOW, it seems to break things
# ALSO MAKE SURE NOT TO DO READ IN THING IN CASE OF CLUSTERING DIFFS
# future::plan('multisession', workers=workernum)
# if(!file.exists(intmarkersfile)){
#
# #Seurat clusters
# seuratmarkertime <- proc.time()
# m_integrated_clusters <- FindAllMarkers(sobjint,
# only.pos = T)
# seuratmarkertime <- proc.time() - seuratmarkertime
#
#
# # future::plan(strategy = 'sequential')
#
# # takes about an hour, future parallel is not stable and cause memory crash
#
#
# write.csv(m_integrated_clusters, intmarkersfile, quote = F, row.names = F)
#
#
# } else{
# m_integrated_clusters <- read.csv(intmarkersfile)
#
# }
m_integrated_clusters <- FindAllMarkers(sobjint,
only.pos = T)
# as of Nov 9 2023 (Seurat v5): add score to markers
m_integrated_clusters$score <- (m_integrated_clusters$pct.1 - m_integrated_clusters$pct.2) * m_integrated_clusters$avg_log2FC
write.csv(m_integrated_clusters, intmarkersfile, quote = F, row.names = F)
#remove sobjint for now, so we can read in each seurat object and save them.
# we'll read back in after
#add int clusters to each sobj in sobjlist with intmd
intmd <- sobjint@meta.data
rm(sobjint)
invisible(gc(full = T, reset = F, verbose = F))
## WE ALSO NEED PREDICTIONS ASSAY FROM INDIVIDUAL OBJECTS
# only if labeltransfer == T
#add clusters to each sobjlist object and then save each object
# shoudl already be created above...
outdir_indi_seuratobjs <- paste0(outdir_indi, '/processed_Seurat_objects/')
dir.create(outdir_indi_seuratobjs, recursive = T)
predictionmats <- lapply(sample_metadata$Code, function(code){
#read in from temp dir
sobj <- readRDS(paste0(tmpobjdir, '/', code, '.rds'))
#subset intmd for this sample
intmd_sub <- intmd[intmd$Code == code,]
#get md for this sample from sobjlist (pre-int sobj)
md <- sobj@meta.data
#make sure they match (very likely they always will...?)
intmd_sub <- intmd_sub[match(md$Barcode, intmd_sub$scBarcode),]
#get clusters; clustname defined above
sobj@meta.data[,clustname] <- intmd_sub[,clustname]
#save each sample obj to proc obj dir
invisible( saveRDS(sobj,
paste0(outdir_indi_seuratobjs, '/SeuratObject-', code, '.rds'))
)
if(use_labeltransfer == T){
#return prediction assay with proper risc barcodes
predictionmat <- sobj@assays$predictions@data
colnames(predictionmat) <- intmd_sub$Barcode
return(predictionmat)
} else{
return(code)
}
})
#remove temp folder
unlink(tmpobjdir, recursive = T)
#read sobjint back in
sobjint <- readRDS( paste0(outdir_int_objects, '/Seurat-object_integrated.rds') )
#add predictions assay to integrated seurat object
if(use_labeltransfer==T){
#combine prediction matrices and add to seurat obj
predmat <- as(as.matrix(dplyr::bind_cols(predictionmats)), "sparseMatrix")
rownames(predmat) <- rownames(predictionmats[[1]])
#match order of barcodes
predmat <- predmat[,match(rownames(intmd), colnames(predmat))]
predassay <- CreateAssayObject(data=predmat)
sobjint[['predictions']] <- predassay
### also predict int cluster-celltype mapping ###
#get predmat
predmat <- sobjint@assays$predictions@data
#drop "max"
predmat <- predmat[1:(nrow(predmat)-1), ]
#for each cluster, get max
md <- sobjint@meta.data
clustname <- 'seurat_clusters'
clustmaxlist <- lapply(levels(md[,clustname]), function(clust){
clustcells <- rownames( md[md[,clustname] == clust,] )
clustpred <- predmat[,colnames(predmat) %in% clustcells,drop=F]
clust_avgscores <- Matrix::rowMeans(clustpred)
maxscore <- clust_avgscores[which.max(clust_avgscores)]
data.frame(cluster = clust, max = names(maxscore), score = maxscore, row.names = NULL)
})
clustmaxdf <- dplyr::bind_rows(clustmaxlist)
#make a label
clustmaxdf$label <- paste0( clustmaxdf$max)
#uncertainty... if < 0.3, put as NA
# if between 0.3 - 0.6, put putative
clustmaxdf[clustmaxdf$score < 0.3,'label'] <- NA
clustmaxdf[clustmaxdf$score >= 0.3 & clustmaxdf$score < 0.5,'label'] <- paste0( clustmaxdf[clustmaxdf$score >= 0.3 & clustmaxdf$score < 0.5,'label'],
'_putative')
#add to metadata
sobjint$INT_celltype_cluster_prediction <- sobjint$seurat_clusters
sobjint$INT_celltype_cluster_prediction <- plyr::mapvalues(sobjint$INT_celltype_cluster_prediction,
from = levels(sobjint$INT_celltype_cluster_prediction),
to = clustmaxdf$label)
#write out cluster-celltype mapping
labeltransfer_outdir_int <- paste0(outdir_int, '/labeltransfer/')
dir.create(labeltransfer_outdir_int)
clustcelltypemapfile <- paste0(labeltransfer_outdir_int, '/IntClusterCelltypeMapping.csv')
write.csv(x=clustmaxdf,
file = clustcelltypemapfile,
row.names = F, quote = F)
rm(predmat,predictionmats, predassay, clustmaxlist)
}
## clean up env
rm(predictionmats, intmd)
invisible(gc(full = T, reset = F, verbose = F))
## readin concat matrix with raw counts and put in seurat object ##
matlist <- readRDS( paste0(outdir_int_objects, '/.concatmatrix.rds') )
#for each matrix, change cell names to match seurat object
set_code <- paste0(sobjint$Set, '_', sobjint$Code)
set_code <- unique(set_code)
set_code <- str_split_fixed(set_code,pattern = '_', n=2)
matlist <- lapply(1:length(matlist), function(i){
mat <- matlist[[i]]
code <- names(matlist)[i]
set <- set_code[set_code[,2] == code,1]
colnames(mat) <- paste0(set, '_', colnames(mat))
mat
})
#join matrices, filter genes from joint mat and proceed with union
bigmat <- do.call(cbind, matlist)
#get num cells expressing
num_nonzeros <- tabulate(bigmat@i + 1)
#get joint filtered genes as those exp by >= 3 cells
joint_filt_genes <- rownames(bigmat)[num_nonzeros >= 3]
# filter by these
bigmat <- bigmat[rownames(bigmat) %in% joint_filt_genes,]
# filter cells and match order to seurat
bigmat <- bigmat[,match(colnames(sobjint), colnames(bigmat))]
colnames(bigmat) <- colnames(sobjint)
#add as RNA assay
rnaassay <- Seurat::CreateAssayObject(counts = bigmat)
sobjint[['RNA']] <- rnaassay
DefaultAssay(sobjint) <- 'RNA'
sobjint <- NormalizeData(sobjint, assay = 'RNA')
DefaultAssay(sobjint) <- 'RISC'
#add in expm1 risc values to counts slot of risc assay
sobjint@assays$RISC@counts <- expm1(sobjint@assays$RISC@data)
#unlink temp concat mat
unlink( paste0(outdir_int_objects, '/.concatmatrix.rds') )
#clean env
invisible(gc(full = T, reset = F, verbose = F))
rm(bigmat, joint_filt_genes, num_nonzeros, matlist)
#factorize sample column to make sure it's the right order...
sobjint$Code <- factor(sobjint$Code, levels = sample_metadata$Code)
#factorize condiiton column to make sure it's the right order...
sobjint$Condition <- factor(sobjint$Condition, levels = levels(sample_metadata$Condition))
#update saved sobjint with predictions assay
saveRDS(sobjint, paste0(outdir_int_objects, '/Seurat-object_integrated.rds') )
## clean up env
rm(predictionmats, intmd)
invisible(gc(full = T, reset = F, verbose = F))
RISC requires the selection of a reference sample from among the dataset. The optimal reference sample is one that has the most diversity of celltypes.
plotlab <- '
One way to select the reference sample is manual inspection of the "InPlot". This plot consists of three panels. The top panel is the most important, and shows the number of clusters detected in each sample across a range of different PC values. Essentially, this is a proxy of the number of cell types detected in the dataset, and the sample with most clusters should be selected as the reference. The second panel allows selection of the optimal PC value to use and the amount of variance each PC has for each sample, with good samples having a higher score (indicating more diversity). The third panel indicates bias of the gene signatures, samples with high scores here should not be selected as the reference.
'
cat(plotlab)
One way to select the reference sample is manual inspection of the “InPlot”. This plot consists of three panels. The top panel is the most important, and shows the number of clusters detected in each sample across a range of different PC values. Essentially, this is a proxy of the number of cell types detected in the dataset, and the sample with most clusters should be selected as the reference. The second panel allows selection of the optimal PC value to use and the amount of variance each PC has for each sample, with good samples having a higher score (indicating more diversity). The third panel indicates bias of the gene signatures, samples with high scores here should not be selected as the reference.
print(ip)
plotlab <- '
## Automated RISC reference selection
We have implemented an automated reference selection algorithm, extending the intuition laid out above. This is based on two metrics: 1) the number of clusters, and 2) the cluster-moderated sample variance.
It is not enough to pick the sample with the most clusters, because sometimes samples have ties in the number of clusters, and cluster number alone can be proportional to number of cells. Thus, we weight the number or clusters by a metric of diversity in the sample which we call the cluster-moderated sample variance.
The cluster-moderated sample variance is calculated first by clustering each sample (ie, using the Seurat Louvain clusters described above), then averaging the sample at the cluster level. Then, the variance of each cluster is calculated, and finally, the mean of the cluster-wise variance is taken.
Then, to calculate the reference selection score, we multiply the number of clusters per sample, times a relative weighing metric based on the number of cells in each sample relative to the sample with the most cells in the dataset (biggest gets a one, sampe with half cells of biggest gets a 0.5); then we multiply this value by the cluster-moderated variance described above.
We show here the resulting reference scores. The top score is selected as the reference. Please note that the autoselection is experimental, so if the score and selected reference deviates from InPlot, we recommend re-running the pipeline and manually selecting the optimal reference.
In this dataset, sample **%s** would be selected as the reference.
'
cat(sprintf(plotlab, names(maxautoscore)))
We have implemented an automated reference selection algorithm, extending the intuition laid out above. This is based on two metrics: 1) the number of clusters, and 2) the cluster-moderated sample variance.
It is not enough to pick the sample with the most clusters, because sometimes samples have ties in the number of clusters, and cluster number alone can be proportional to number of cells. Thus, we weight the number or clusters by a metric of diversity in the sample which we call the cluster-moderated sample variance.
The cluster-moderated sample variance is calculated first by clustering each sample (ie, using the Seurat Louvain clusters described above), then averaging the sample at the cluster level. Then, the variance of each cluster is calculated, and finally, the mean of the cluster-wise variance is taken.
Then, to calculate the reference selection score, we multiply the number of clusters per sample, times a relative weighing metric based on the number of cells in each sample relative to the sample with the most cells in the dataset (biggest gets a one, sampe with half cells of biggest gets a 0.5); then we multiply this value by the cluster-moderated variance described above.
We show here the resulting reference scores. The top score is selected as the reference. Please note that the autoselection is experimental, so if the score and selected reference deviates from InPlot, we recommend re-running the pipeline and manually selecting the optimal reference.
In this dataset, sample Healthy_2 would be selected as the reference.
#format ref scores as data.frame
refscoredf <- data.frame('RefScore' = refscore)
refscoredf$Max <- ''
refscoredf[maxautoscore,'Max'] <- '*'
refscoredf$RefScore <- round(refscoredf$RefScore, 5)
#add "set"
refscoredf$Set <- paste0("Set-", 1:nrow(refscoredf))
#add sample and remove rownames
refscoredf$Code <- rownames(refscoredf)
rownames(refscoredf) <- NULL
#reorder
refscoredf <- refscoredf[,c('Code', 'Set', 'RefScore', 'Max')]
knitr::kable(refscoredf)
| Code | Set | RefScore | Max |
|---|---|---|---|
| Healthy_1 | Set-1 | 8.21950 | |
| Healthy_2 | Set-2 | 9.31144 | * |
| Covid_Mild_1 | Set-3 | 6.81909 | |
| Covid_Mild_2 | Set-4 | 5.03081 | |
| Covid_Critical_1 | Set-5 | 5.24910 | |
| Covid_Critical_2 | Set-6 | 7.78851 |
if( !(is.null(risc_reference)) ){
plotlab <- '
## Manual reference selection: %s
In this analysis, sample %s was manually selected as the reference sample. Sample %s will be used as the reference, regardless of the auto-selection described above.
'
cat(sprintf(plotlab, risc_reference, risc_reference, risc_reference))
}
#set summaryplot dir
outdir_int_plots <- paste0(outdir_int, '/integration_summaryplots/')
dir.create(outdir_int_plots)
#first, plot the ref selection stuff: inplot and autoselect
pdf( paste0(outdir_int_plots, 'RISC_reference_selection.pdf'), height = 8, width = 6 )
print(ip)
if( !is.null(risc_reference) ){
print(
pdftable(refscoredf,
title = paste0('Manual reference selected\nSample: ',
risc_reference)
)
)
} else{
print(
pdftable(refscoredf,
title = paste0('RISC reference\nautoselection')
)
)
}
dev.off()
## for each sample get int clusters for alluvial plot
mdint <- sobjint@meta.data
## plots of the integrated data
#umap of clusters
clustname <- risc_clust_lab
d1_int <- DimPlot(sobjint, label = T, repel = T, group.by = clustname)
#umap of samples
d2_int <- DimPlot(sobjint, group.by = 'Code', split.by = 'Condition', ncol = 2) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=1.1))
#conditions
dcond <- DimPlot(sobjint, group.by = 'Condition')
#split by conditions
dcond_split <- DimPlot(sobjint, label = T, repel = T,
group.by = clustname,
split.by = 'Condition',
ncol=2)+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1.1))
#heatmap of cluster markers
DefaultAssay(sobjint) <- 'RISC'
n <- 5
top <- m_integrated_clusters %>% group_by(cluster) %>% top_n(n = n, wt = score)
genes <- top$gene
#scale the relevant genes
sobjint <- ScaleData(sobjint, features = genes, verbose = verbose)
# #make sure genes are in
# if( any( !(genes %in% rownames(sobjint@assays$SCT@scale.data)) ) ){
#
# #try getresidual...
# missinggenes <- genes[!(genes %in% rownames(sobjint@assays$SCT@scale.data))]
# sobjint <- GetResidual(sobjint, missinggenes, na.rm = F, replace.value = T)
#
# #it can be complicated doing this after integration, some genes are NAs...
# scgem <- sobjint@assays$SCT@scale.data
#
# if( any( !complete.cases(scgem) ) ){
# scgem <- scgem[complete.cases(scgem),]
# top <- top[top$gene %in% rownames(scgem),]
# sobjint@assays$SCT@scale.data <- scgem
# }
# rm(scgem)
#
# }
#prep heatmap
top <- top[top$gene %in% rownames(sobjint),]
gem <- sobjint@assays$RISC@scale.data
gem <- gem[match(top$gene, rownames(gem)),]
#annot for clusters
#first order gem by cluster...
md <- sobjint@meta.data
md <- md[order(md$seurat_clusters),]
gem <- gem[,match(rownames(md), colnames(gem))]
clust_bc <- setNames(md$seurat_clusters,
nm = colnames(gem)
)
col_clust <- setNames(scales::hue_pal()(length(levels(sobjint$seurat_clusters))),
nm = levels(sobjint$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc, col = list(Cluster = col_clust), show_legend = F)
#annot for markers
#set genes according to ct_ordered
top$cluster <-factor(top$cluster, levels=unique(top$cluster))
# top <- top[order(top$cluster),]
gem <- gem[match(top$gene, rownames(gem)),]
ct_gene <- setNames(top$cluster,
nm=top$gene)
col_gene <- col_clust
col_gene <- col_gene[names(col_gene) %in% top$cluster]
ha_genes <- ComplexHeatmap::rowAnnotation(Cluster = ct_gene, col = list(Cluster = col_gene),
show_annotation_name=F)
#restrict range
gem[gem>5] <- 5
gem[gem<-5] <- -5
#actual heatmap
hm_int <- ComplexHeatmap::Heatmap(gem,
# column_title = 'Integrated clusters',
column_labels = rep('', ncol(gem)),
row_names_gp = grid::gpar(fontsize = 5),
column_split = md$seurat_clusters,
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 5),
row_gap = unit(0.8, "mm"),
column_gap = unit(0.8, "mm"),
row_title_rot = 0,
column_title_rot = 45,
column_title_gp = grid::gpar(fontsize = 7),
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
### marker avergae heatmap ###
#try to average the matrix
avgl <- lapply(levels(md$seurat_clusters), function(clust){
mdc <- md[md$seurat_clusters==clust,]
gemc <- gem[,colnames(gem) %in% rownames(mdc)]
avg <- matrix(rowMeans(gemc),
dimnames = list(rownames(gem), clust))
avg
})
avg <- do.call('cbind',avgl)
#need to re-prep column annot
clust_bc <- factor(str_sort(colnames(avg), numeric = T), levels = str_sort(colnames(avg), numeric = T))
col_clust <- setNames(scales::hue_pal()(length(levels(sobjint$seurat_clusters))),
nm = levels(sobjint$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc, col = list(Cluster = col_clust), show_legend = F)
hm_int_avg <- ComplexHeatmap::Heatmap(avg,
# column_title = 'Integrated clusters',
row_names_gp = grid::gpar(fontsize = 6),
column_names_gp = grid::gpar(fontsize = 6),
column_split = factor(str_sort(colnames(avg), numeric = T), levels = str_sort(colnames(avg), numeric = T)),
column_title_rot = 0,
column_names_rot = 0,
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 6),
row_gap = unit(0.5, "mm"),
column_gap = unit(0.5, "mm"),
row_title_rot = 0,
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
## update Aug 8 add qc plots
int_qc_vln <- wrap_plots(ncol = 2, list(
VlnPlot(sobjint, 'nCount_RNA', pt.size = 0.1)+NoLegend() ,
VlnPlot(sobjint, 'nFeature_RNA', pt.size = 0.1)+NoLegend() ,
VlnPlot(sobjint, 'percent.mito', pt.size = 0.1)+NoLegend() ,
VlnPlot(sobjint, 'percent.hemoglobin', pt.size = 0.1)+NoLegend()
))
# DefaultAssay(sobjint) <- 'integrated'
### alluvial plot of condition to cluster
md <- sobjint@meta.data
labelsdf <- md[,c("Condition", clustname)]
ap_cond_to_clust <- alluvialplot(labelsdf)
### alluvial plot of sample to cluster
# do this for each condition separately
mdint <- sobjint@meta.data
ap_samp_to_clust_list <- lapply(unique(sample_metadata$Condition), function(cond){
md <- mdint[mdint$Condition == cond,]
labelsdf <- md[,c("Code", clustname)]
#avoid relevlling, same colors for all gets confusing
# levels(labelsdf$Code) <- levels(labelsdf$Code)[levels(labelsdf$Code) %in% labelsdf$Code]
# labelsdf$Code <- factor(labelsdf$Code, levels = levels(labelsdf$Code)[levels(labelsdf$Code) %in% labelsdf$Code] )
ap_samp_to_clust <- alluvialplot(labelsdf) + labs(title = cond) + NoLegend()
return(ap_samp_to_clust)
})
names(ap_samp_to_clust_list) <- unique(sample_metadata$Condition)
ap_samp_to_clust <- wrap_plots(ap_samp_to_clust_list)
#loop thru each code, make alluvialplot of indi to int cluster
mdint <- sobjint@meta.data
indi_int_clust_ap_list <- lapply(sample_metadata$Code, function(code){
md <- mdint[mdint$Code == code,]
indiclustname <- paste0("SCT_snn_res.", res_indi)
intclustname <- risc_clust_lab <- paste0('RISC_Louvain_npc', pcs_int, '_res', res_int)
labelsdf <- md[,c(indiclustname, intclustname)]
labelsdf[,1] <- factor(labelsdf[,1], levels = stringr::str_sort(unique(labelsdf[,1]), numeric = T) )
names(labelsdf)[1] <- 'Individual Clusters'
indi_int_clust_ap <- scDAPP::alluvialplot(labelsdf)
return(indi_int_clust_ap)
})
names(indi_int_clust_ap_list) <- sample_metadata$Code
summaryplots_integrated <- list(dcond=dcond,
d2_int=d2_int,
d1_int=d1_int,
dcond_split=dcond_split,
ap_cond_to_clust=ap_cond_to_clust,
ap_samp_to_clust=ap_samp_to_clust,
hm_int=hm_int,
hm_int_avg=hm_int_avg,
int_qc_vln=int_qc_vln,
indi_int_clust_ap_list = indi_int_clust_ap_list)
### save pdfs to a subdir of outdir_int
outdir_int_plots <- paste0(outdir_int, '/integration_summaryplots/')
dir.create(outdir_int_plots)
pdf( paste0(outdir_int_plots, '/summaryplots_integrated.pdf'), height = 9, width=9 )
print(summaryplots_integrated)
dev.off()
### cell type plots
if(use_labeltransfer == T){
#umap of cell type calls (calculated in individual samples)
d3_int <- DimPlot(sobjint, label = T, repel = T, group.by = 'top_celltype_thresholded')
#umap showing top cell type call score (calculated in individual samples)
d4_int <- FeaturePlot(SetIdent(sobjint, value = 'top_celltype_thresholded', ), label = T, repel = T, features = 'top_celltype_call_seurat_score')
#heatmap of the prediction scores per cluster
# use scaled values, comparable between clusters
# get avgs
avgs <- AverageExpression(sobjint, assays = 'predictions', return.seurat = F)
#remove max
avgs <- head( as.data.frame(avgs) , -1)
# remove empty prediction rows with all 0s
avgs <- avgs[Matrix::rowSums(avgs)>0,]
#make column titles nicer
colnames(avgs) <- gsub('predictions.', 'cluster_', colnames(avgs))
#format as numeric matrix
avgs <- as.matrix(avgs)
### scale --> this emphasizes diffs between clusters, seems to put related cell types together
avgs <- t(scale(t(avgs)))
#select middle value for color scale
medval <- mean(avgs)
#plot it
hm_ctscores <- ComplexHeatmap::Heatmap(avgs,
name = 'Scaled\nmean prediction\nscores per cluster',
column_title = 'Integrated',
rect_gp = grid::gpar(col = "white", lwd = 0.5),
circlize::colorRamp2(c(min(avgs), medval, max(avgs)), c("blue", "white", "red")))
#for dotplot, add dendrogram and row label order
# it may throw a warning about drawing plot first etc,
#shouldn't be an issue if we set seed, which we did
suppressWarnings(
dend <- row_dend(hm_ctscores)
)
#get ordered row labels for dotplot and other plots
suppressWarnings(
ct_ordered <- rownames(hm_ctscores@matrix)[ComplexHeatmap::row_order(hm_ctscores)]
)
nicedend <- ggdendro::ggdendrogram(rev(dend), rotate = T) +
scale_y_reverse(expand = c(0.05, 0))+
theme(axis.text.y = element_blank(),
axis.text.x = element_blank())
dp_ctscores <- DotPlot(sobjint, assay = 'predictions', rev(ct_ordered)) +
coord_flip() +
theme(axis.title.y=element_blank(),
axis.text.y = element_text(hjust = 0),
axis.text.x = element_text(size=5) )+
scale_color_gradient2(low = 'blue', high = 'red', mid = 'grey')+
xlab(label = 'Cluster')+
guides(color = guide_colorbar(title = "Scaled Average\nPrediction Score"))
dp_ctscores <- patchwork::wrap_plots(list(nicedend, dp_ctscores), widths = c(0.3,1))
#get exp of the celltypes
rs <- Matrix::rowSums(sobjint@assays$predictions@data)
rs <- head( rs , -1)
#keep only celltypes that are exp
rs <- sort(rs[rs>0], decreasing = T)
ct_in <- names(rs)
# heatmap of reference markers
m_reference <- readRDS(m_reference_path)
#make sure to remove underscores, since seurat doesn't like it in label transfer feature names
m_ref_small <- m_reference
#as of Nov 9 2023 (Seurat v5); add score
m_ref_small$score <- (m_ref_small$pct.1 - m_ref_small$pct.2) * m_ref_small$avg_log2FC
lt_ref_levs <- levels(m_ref_small$cluster)
lt_ref_levs <- gsub('_', '-', lt_ref_levs)
m_ref_small$cluster <- plyr::mapvalues(m_ref_small$cluster, from = levels(m_ref_small$cluster), lt_ref_levs)
m_ref_small <- m_ref_small[m_ref_small$gene %in% rownames(sobjint),]
m_ref_small <- m_ref_small[m_ref_small$cluster %in% ct_in,]
n <- 5
top <- m_ref_small %>% group_by(cluster) %>% top_n(n = n, wt = score)
genes <- top$gene
#scale the relevant genes
sobjint <- ScaleData(sobjint, features = genes, verbose = verbose)
#make sure genes are in
# if( any( !(genes %in% rownames(sobjint@assays$SCT@scale.data)) ) ){
# missinggenes <- genes[!(genes %in% rownames(sobjint@assays$SCT@scale.data))]
# sobjint <- GetResidual(sobjint, genes)
# }
#prep heatmap
top <- top[top$gene %in% rownames(sobjint),]
gem <- sobjint@assays$RISC@scale.data
gem <- gem[match(top$gene, rownames(gem)),]
#annot for clusters
#first order gem by cluster...
md <- sobjint@meta.data
md <- md[order(md$seurat_clusters),]
gem <- gem[,match(rownames(md), colnames(gem))]
clust_bc <- setNames(md$seurat_clusters,
nm = colnames(gem)
)
col_clust <- setNames(scales::hue_pal()(length(levels(sobjint$seurat_clusters))),
nm = levels(sobjint$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc, col = list(Cluster = col_clust), show_legend = F)
#annot for markers
#set genes according to ct_ordered
top$cluster <-factor(top$cluster, levels=ct_ordered)
top <- top[order(top$cluster),]
gem <- gem[match(top$gene, rownames(gem)),]
ct_gene <- setNames(top$cluster,
nm=top$gene)
coul <- RColorBrewer::brewer.pal(8, "Set2")
coul <- colorRampPalette(coul)(length(unique(top$cluster)))
col_gene <- setNames(coul, nm = unique(top$cluster))
ha_genes <- ComplexHeatmap::rowAnnotation(Celltype = ct_gene, col = list(Celltype = col_gene),
show_annotation_name=F)
#restrict range
gem[gem>5] <- 5
gem[gem<-5] <- -5
#actual heatmap
hm_refmarkers <- ComplexHeatmap::Heatmap(gem,
# column_title = 'Integrated clusters',
column_labels = rep('', ncol(gem)),
row_names_gp = grid::gpar(fontsize = 5),
column_split = md$seurat_clusters,
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 5),
row_gap = unit(0.8, "mm"),
column_gap = unit(0.8, "mm"),
row_title_rot = 0,
column_title_rot = 45,
column_title_gp = grid::gpar(fontsize = 7),
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
#try to average the matrix
avgl <- lapply(levels(md$seurat_clusters), function(clust){
mdc <- md[md$seurat_clusters==clust,]
gemc <- gem[,colnames(gem) %in% rownames(mdc)]
avg <- matrix(rowMeans(gemc),
dimnames = list(rownames(gem), clust))
avg
})
avg <- do.call('cbind',avgl)
#need to re-prep column annot
clust_bc <- factor(str_sort(colnames(avg), numeric = T), levels = str_sort(colnames(avg), numeric = T))
col_clust <- setNames(scales::hue_pal()(length(levels(sobjint$seurat_clusters))),
nm = levels(sobjint$seurat_clusters))
ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc, col = list(Cluster = col_clust))
hm_refmarkers_avg <- ComplexHeatmap::Heatmap(avg,
# column_title = 'Integrated clusters',
row_names_gp = grid::gpar(fontsize = 6),
column_names_gp = grid::gpar(fontsize = 6),
column_split = factor(str_sort(colnames(avg), numeric = T), levels = str_sort(colnames(avg), numeric = T)),
column_title_rot = 0,
column_names_rot = 0,
row_split = top$cluster,
row_title_gp = grid::gpar(fontsize = 6),
row_gap = unit(0.5, "mm"),
column_gap = unit(0.5, "mm"),
row_title_rot = 0,
name = 'Scaled\nExpression',
cluster_columns = F,
cluster_rows = F,
top_annotation = ha_clust,
left_annotation = ha_genes,
use_raster = F)
#alluvial plot, int clusters to cell type
labelsdf <- mdint[,c('seurat_clusters', 'top_celltype_thresholded')]
ap_int_celltypes <- alluvialplot(labelsdf)
# cluster-celltype mapping table; still in mem.
# clustmaxdf
# cluster-celltypr mapped umap
d5 <- DimPlot(sobjint, group.by = 'INT_celltype_cluster_prediction',
label = T, repel = T)
# vlnplot of score over cluster
# one plot per condition?
#get celltypes
ct_ordered <- rownames(hm_ctscores@matrix)[ComplexHeatmap::row_order(hm_ctscores)]
#split samples by condition
DefaultAssay(sobjint) <- 'predictions'
#make plots of integrated samples
ctplots_int <- lapply(ct_ordered, function(ct){
vct <- VlnPlot(sobjint, ct, pt.size = 0.1)
list(vct=vct)
})
names(ctplots_int) <- ct_ordered
DefaultAssay(sobjint) <- 'RISC'
summaryplots_integrated_ct <- list(
hm_ctscores=hm_ctscores,
dp_ctscores=dp_ctscores,
hm_refmarkers=hm_refmarkers,
hm_refmarkers_avg=hm_refmarkers_avg,
d3_int=d3_int,
d4_int=d4_int,
ap_int_celltypes=ap_int_celltypes,
clustmaxdf = clustmaxdf,
d5=d5,
ctplots_int=ctplots_int
)
pdf( paste0(labeltransfer_outdir_int, '/LabelTransferSummaryPlots.pdf'), height = 9, width=9 )
for(i in 1:length(summaryplots_integrated_ct) ){
if(names(summaryplots_integrated_ct)[i] == 'clustmaxdf'){
print( scDAPP::pdftable(summaryplots_integrated_ct[[i]], title = 'Cluster-Celltype Mapping') )
} else{
print(summaryplots_integrated_ct[[i]])
}
}
dev.off()
}
plotlab <- "
## UMAP colored by condition
Here we plot a UMAP of the integrated dataset colored according to the sample’s condition. Overall we expect to observe a good overlap, while some sections may be quite distinct. This can correspond to clusters of cell states or cell types that are present in one sample but absent from another.
"
cat( plotlab )
Here we plot a UMAP of the integrated dataset colored according to the sample’s condition. Overall we expect to observe a good overlap, while some sections may be quite distinct. This can correspond to clusters of cell states or cell types that are present in one sample but absent from another.
print( summaryplots_integrated$dcond )
plotlab <- "
## UMAP split by condition and colored by sample
Here, the integrated UMAP is split by condition, meaning that cells from Condition A are separated from cells from condition B. Additionally, cells are colored according to their sample origin. This allows us to check the overlap of biological replicates from the same condition.
"
cat( plotlab )
Here, the integrated UMAP is split by condition, meaning that cells from Condition A are separated from cells from condition B. Additionally, cells are colored according to their sample origin. This allows us to check the overlap of biological replicates from the same condition.
print( summaryplots_integrated$d2_int )
plotlab <- "
## UMAP colored by integrated clusters
Here we plot the same UMAP but this time colored by the integrated clusters. These clusters are important to characterize, as these are the groups that we will compare one-by-one between conditions.
"
cat( plotlab )
Here we plot the same UMAP but this time colored by the integrated clusters. These clusters are important to characterize, as these are the groups that we will compare one-by-one between conditions.
print( summaryplots_integrated$d1_int )
plotlab <- "
## UMAP split by condition and colored by integrated clusters
Here we plot the UMAP split by conditon and colored by integrated clusters. This is useful to see compositional differences in which clusters may be enriched or depleted between conditions.
"
cat( plotlab )
Here we plot the UMAP split by conditon and colored by integrated clusters. This is useful to see compositional differences in which clusters may be enriched or depleted between conditions.
print( summaryplots_integrated$dcond_split )
plotlab <- "
## Heatmap of integrated cluster markers
Here we plot the top 5 markers per cluster for the integrated clusters.
"
cat( plotlab )
Here we plot the top 5 markers per cluster for the integrated clusters.
print( summaryplots_integrated$hm_int )
plotlab <- "
## Averaged heatmap of integrated cluster markers
Here we plot the top 5 markers per cluster for the integrated clusters after averaging over each cluster. It is similar to the marker heatmap above, but can help visualize the cluster-wise expression by plotting the cluster averages instead of each individual cell.
"
cat( plotlab )
Here we plot the top 5 markers per cluster for the integrated clusters after averaging over each cluster. It is similar to the marker heatmap above, but can help visualize the cluster-wise expression by plotting the cluster averages instead of each individual cell.
print( summaryplots_integrated$hm_int_avg )
plotlab <- "
## Violin Plots of quality metrics
Here we plots some quality metrics for each integrated cluster including number of UMIs per cell (nCount_RNA), number of unique genes detected per cell (nFeature_RNA), percent mitochondrial gene expression (percent.mito), and percent hemoglobin gene expression (percent.hemoglobin).
"
cat( plotlab )
Here we plots some quality metrics for each integrated cluster including number of UMIs per cell (nCount_RNA), number of unique genes detected per cell (nFeature_RNA), percent mitochondrial gene expression (percent.mito), and percent hemoglobin gene expression (percent.hemoglobin).
print( summaryplots_integrated$int_qc_vln )
plotlab <- "
## Alluvial plot mapping condition to integrated clusters
This alluvial plot allows us to see the contribution of each condition to each cluster. This allows us to visualize compositional differences between conditions clearly.
"
cat( plotlab )
This alluvial plot allows us to see the contribution of each condition to each cluster. This allows us to visualize compositional differences between conditions clearly.
print( summaryplots_integrated$ap_cond_to_clust )
plotlab <- "
## Alluvial plot mapping samples to integrated clusters
This alluvial plot allows us to see the contribution of each sample to each cluster. Combined with the condition-focused alluvial plot above, this allows us to verify compositional patterns across biological replicates.
"
cat( plotlab )
This alluvial plot allows us to see the contribution of each sample to each cluster. Combined with the condition-focused alluvial plot above, this allows us to verify compositional patterns across biological replicates.
print( summaryplots_integrated$ap_samp_to_clust )
plotlab <- "
## Per-sample alluvial plots mapping indiviudal sample clusters to integrated clusters
For each sample, we plot an alluvial plot mapping the individual sample clusters with the integrated clusters.
"
cat( plotlab )
For each sample, we plot an alluvial plot mapping the individual sample clusters with the integrated clusters.
indi_int_clust_ap_list <- summaryplots_integrated$indi_int_clust_ap_list
for(i in c(1:length(indi_int_clust_ap_list)) ){
name=names(indi_int_clust_ap_list)[i]
plotlab <- "
### %s
Individual Clusters (left) mapped to Integrated Clusters (right)
"
cat(sprintf(plotlab, name))
print(indi_int_clust_ap_list[[i]])
}
Individual Clusters (left) mapped to Integrated Clusters (right)
Individual Clusters (left) mapped to Integrated Clusters (right)
Individual Clusters (left) mapped to Integrated Clusters (right)
Individual Clusters (left) mapped to Integrated Clusters (right)
Individual Clusters (left) mapped to Integrated Clusters (right)
Individual Clusters (left) mapped to Integrated Clusters (right)
rm(indi_int_clust_ap_list)
# print( summaryplots_integrated$indi_int_clust_ap_list )
if(use_labeltransfer == T){
text <- '
# Integrated label transfer summary
Above, we explored the integrated clusters. Here, we review the cell type scores from label transfer. The goal is to understand which cell types make up each cluster.
Each cell is a mixture of cells. Using label transfer, we can get a score for each cell type in the tissue. If a cell is composed of one cell type, it will have a high score for that cell type only. If a cell is composed of a mix of cell types, it may have a moderately high score for two or more cell types.
As mentioned above, each cell may contain a single cell or a mixture of cells and potentially multiple cell types. Thus, assigning a single label to each cell is difficult. Nevertheless, we use the label transfer scores to detect patterns of cell types in each cluster.
Note that we apply a label transfer score cutoff of 0.3. If the cells are below this, they are marked as NA. These may represent cells missing from the reference.
'
cat(text)
}
Above, we explored the integrated clusters. Here, we review the cell type scores from label transfer. The goal is to understand which cell types make up each cluster.
Each cell is a mixture of cells. Using label transfer, we can get a score for each cell type in the tissue. If a cell is composed of one cell type, it will have a high score for that cell type only. If a cell is composed of a mix of cell types, it may have a moderately high score for two or more cell types.
As mentioned above, each cell may contain a single cell or a mixture of cells and potentially multiple cell types. Thus, assigning a single label to each cell is difficult. Nevertheless, we use the label transfer scores to detect patterns of cell types in each cluster.
Note that we apply a label transfer score cutoff of 0.3. If the cells are below this, they are marked as NA. These may represent cells missing from the reference.
if(use_labeltransfer == T){
plotlab <- "
## Heatmap of cell type label transfer scores with integrated clusters
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
"
cat( plotlab )
print(summaryplots_integrated_ct$hm_ctscores)
plotlab <- "
## Dotplot of label transfer cell type scores with integrated clusters
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above. However, the dotplot also includes information about how many cells express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A small dot indicates few cells express the score.
"
cat( plotlab )
print(summaryplots_integrated_ct$dp_ctscores)
plotlab <- "
## Heatmap of reference cell type markers in integrated data
Here we plot the top 5 markers of cell types as sorted by average log2 fold change, which are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type, for example a cluster with high T cell score should express high T cell markers.
"
cat( plotlab )
print(summaryplots_integrated_ct$hm_refmarkers)
plotlab <- "
## Heatmap of reference cell type markers averaged across clusters
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
"
cat( plotlab )
print(summaryplots_integrated_ct$hm_refmarkers_avg)
plotlab <- "
## UMAP of Top Scoring Celltype
The top scoring celltype is plotted on the UMAP.
"
cat( plotlab )
print(summaryplots_integrated_ct$d3_int)
plotlab <- "
## UMAP of label transfer prediction score
The label transfer prediction score is plotted on the UMAP. This is a measure of confidence. Low scoring cells may indicate a new cell in the data that was not present in the reference.
"
cat( plotlab )
print(summaryplots_integrated_ct$d4_int)
plotlab <- "
## Alluvial plot mapping between Louvain clusters to top celltypes
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster.
"
cat( plotlab )
print(summaryplots_integrated_ct$ap_int_celltypes)
plotlab <- '
## Table showing cluster-celltype mapping
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset.
Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix "_putative" to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
'
cat( plotlab )
clustmaxprint <- summaryplots_integrated_ct$clustmaxdf
clustmaxprint$score <- round(clustmaxprint$score, digits = 5)
print(knitr::kable(clustmaxprint))
plotlab <- "
## UMAP of cluster-celltype mapping
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
"
cat( plotlab )
print(summaryplots_integrated_ct$d5)
#CELLTYPE FEATUREPLOTS
plotlab <- "
## Reference cell types plotted for integrated data for all cell types
Here we plot a summary panel to analyze the label transfer scores for each cell type in detail. To help identify which cluster strongly matches the cell type, the scores are plotted as Violin Plots.
"
cat( plotlab )
template_int_celltypes <- "
### %s
"
invisible(
for( i in c(1:length(summaryplots_integrated_ct$ctplots_int)) ){
ct = names(summaryplots_integrated_ct$ctplots_int)[i]
cat(sprintf(template_int_celltypes, ct))
thisct_plots <- summaryplots_integrated_ct$ctplots_int[[i]]
print(thisct_plots$vct)
# invisible(lapply(thisct_plots$ctplots_conds, print))
}
)
}
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters.
Here we plot the label transfer scores, which are used to predict cell types from the reference single-cell RNAseq dataset. The values are scaled, allowing easy comparison across clusters. It is very similar to the heatmap above. However, the dotplot also includes information about how many cells express the cell type score. A big dot indicates many cells in that cluster express it at non-zero level. A big gray dot indicates widespread, low score among cells. A big red dot indicates widespread, high score among cells. A small dot indicates few cells express the score.
Here we plot the top 5 markers of cell types as sorted by average log2 fold change, which are derived from the reference dataset. Since these are the top specific markers of each cell type in the reference, their expression pattern should match clusters with a high label transfer score for each given cell type, for example a cluster with high T cell score should express high T cell markers.
Here we plot the top 5 reference cell type markers by average log2 fold change, this time using the average cluster expression. The markers are derived from the reference dataset. Using averages allows us to easily see which clusters highly or lowly express the reference cell type markers.
The top scoring celltype is plotted on the UMAP.
The label transfer prediction score is plotted on the UMAP. This is a measure of confidence. Low scoring cells may indicate a new cell in the data that was not present in the reference.
This alluvial plot maps cluster labels to top celltype labels for each cell. This can be useful to identify general trends of celltype score per cluster.
We apply an ad-hoc method to extend the label transfer from individual cells (default) across entire clusters. We do this by taking the average score of each cell type score in the cluster and choosing the max. This can help simplify the analysis and can be less noisy by sharing information across cells, but may also hide intra-cluster variability, such as particular sub-clusters which may correspond to distinct cell sub-types.
We also apply a thresholding approach such that clusters with a max score of less than 0.3 are considered non-transferrable and marked as NA (not applicable). These may reflect clusters of cells which are not in the reference dataset. Additionally, clusters with a score between 0.3 - 0.5 are marked with the suffix “_putative” to indicate the somewhat uncertain status. All cluster markers should be carefully inspected and checked for cell type, independnetly of the label transfer results.
| cluster | max | score | label |
|---|---|---|---|
| 1 | NK | 0.77652 | NK |
| 2 | B | 1.00000 | B |
| 3 | T | 0.99582 | T |
| 4 | MNP | 0.99599 | MNP |
| 5 | MNP | 1.00000 | MNP |
| 6 | MNP | 0.94171 | MNP |
| 7 | T | 0.78191 | T |
As described above, we extend the label transfer prediction from individual cells to whole clusters in order to denoise and simplify the analysis. Here, rather than plotting the individual cell predictions, we plot the cluster-level predictions.
Here we plot a summary panel to analyze the label transfer scores for each cell type in detail. To help identify which cluster strongly matches the cell type, the scores are plotted as Violin Plots.
Here, we test for differences in the abundance of celltypes (clusters) between conditions. This is referred to as compositional analysis.
If we are comparing two conditions such as treated versus untreated or knockout vs wildtype, one biological effect we can try to test for is the difference in the cell type compositional abundance. The idea that tissue cell type compositions can change in response to biological stimuli has been called “polycreodism” and may reflect particular cell type sensitivity to the perturbation (Lappalainen & Greally 2017). For example, it would likely of interest to note that a knockout or drug completely ablates a certain celltype, reduces another by half, increases another by 25%, and leaves others unchanged. Biologically, these kinds of differences can be explained by various mechanisms, such as differentiation of certain cell types being specifically impaired by a knockout, or specific cell types being particularly sensitive to a drug.
We use the scRNA-seq data to test for differential compositional abundance of each cluster. If replicates are present, we make use of these to test differential abdundance across conditions using the “Propeller” method from the R package “speckle” (Phipson et al 2022). This method calculates cell proportions for each sample, applies a variance stabilizing transformation, and then compares proportion across conditions via linear regression. It is similar to pseudobulk differential expression analysis in that it takes into accounts having multiple biological replicate samples for each condition. Specifically, we use the arcsin square root transformation by setting “transform = asin”. We picked this test and this transformation based on the results of a benchmarking study comparing compositional analysis methods (Simmons 2022). Though Propeller calculates False Discovery Rates (FDR), we define significance using only nominal P value for power reasons.
If replicates are not available, we use an ad-hoc method that relies
on the R prop.test() function, which is very similar to the
ChiSquare test (see this
forum discussion). This works by calculating the proportion of cells
in each condition and comparing the proportions. Because there are often
thousands of cells being compared, P values can be extremely low.
However, this method can result in false-positive conclusions, as the
results of 1-vs-1 sample comparisons may not extend to the populations
the samples are drawn from. Thus, we advise caution, and interpretation
without replicates should be considered as preliminary / pilot data.
compositional_test <- ifelse(Pseudobulk_mode == T, yes = 'propeller', no = 'chisq' )
comp_result <- compositional_analysis_module(sobjint = sobjint,
comps = comps,
sample_metadata = sample_metadata,
outdir_int = outdir_int,
grouping_variable = 'seurat_clusters',
compositional_test = compositional_test)
composition_comps <- comp_result$composition_comps
globalcomposition <- comp_result$globalcomposition
rm(comp_result)
plotlab <- '
## Global cell numbers and proportions
Here we show tables of cell numbers and proportions. These can be used to calculate proportion shifts.
'
cat(plotlab)
Here we show tables of cell numbers and proportions. These can be used to calculate proportion shifts.
plotlab <- '
### Table of cell numbers
We display a table of cell numbers for each sample and cluster.
'
cat(plotlab)
We display a table of cell numbers for each sample and cluster.
cellstab <- as.data.frame.matrix(globalcomposition$cellstab)
cellstab <- cbind(rownames(cellstab), cellstab)
colnames(cellstab)[1] <- 'Cluster'
knitr::kable(cellstab)
| Cluster | Healthy_1 | Healthy_2 | Covid_Mild_1 | Covid_Mild_2 | Covid_Critical_1 | Covid_Critical_2 |
|---|---|---|---|---|---|---|
| 1 | 254 | 196 | 236 | 282 | 83 | 201 |
| 2 | 199 | 187 | 193 | 98 | 191 | 193 |
| 3 | 132 | 183 | 155 | 102 | 186 | 158 |
| 4 | 111 | 141 | 148 | 170 | 180 | 162 |
| 5 | 47 | 17 | 12 | 8 | 0 | 3 |
| 6 | 7 | 17 | 9 | 6 | 2 | 2 |
| 7 | 0 | 1 | 3 | 1 | 7 | 25 |
plotlab <- '
### Table of cell proportions
We display a table of cell proportions for each sample and cluster. This is calculated by dividing each column by the sample total. We then round to the third digit for display purposes (though the full proportion table is saved in outputs).
'
cat(plotlab)
We display a table of cell proportions for each sample and cluster. This is calculated by dividing each column by the sample total. We then round to the third digit for display purposes (though the full proportion table is saved in outputs).
proptab <- as.data.frame.matrix(globalcomposition$proptab)
printproptab <- round(proptab, 3)
printproptab <- cbind(rownames(printproptab), printproptab)
colnames(printproptab)[1] <- 'Cluster'
knitr::kable(printproptab)
| Cluster | Healthy_1 | Healthy_2 | Covid_Mild_1 | Covid_Mild_2 | Covid_Critical_1 | Covid_Critical_2 |
|---|---|---|---|---|---|---|
| 1 | 0.339 | 0.264 | 0.312 | 0.423 | 0.128 | 0.270 |
| 2 | 0.265 | 0.252 | 0.255 | 0.147 | 0.294 | 0.259 |
| 3 | 0.176 | 0.247 | 0.205 | 0.153 | 0.287 | 0.212 |
| 4 | 0.148 | 0.190 | 0.196 | 0.255 | 0.277 | 0.218 |
| 5 | 0.063 | 0.023 | 0.016 | 0.012 | 0.000 | 0.004 |
| 6 | 0.009 | 0.023 | 0.012 | 0.009 | 0.003 | 0.003 |
| 7 | 0.000 | 0.001 | 0.004 | 0.001 | 0.011 | 0.034 |
plotlab <- '
### Heatmap of cell proportions in each sample
Here we display a heatmap-style table of cell proportions for each sample and cluster. It is similar to the table above, but we have turned on hierarchical clustering of the rows, to try to visualize patterns of abundance among the clusters.
'
cat(plotlab)
Here we display a heatmap-style table of cell proportions for each sample and cluster. It is similar to the table above, but we have turned on hierarchical clustering of the rows, to try to visualize patterns of abundance among the clusters.
print(globalcomposition$hmprop)
plotlab <- '
## Differential abundance compositional analysis across conditions
Here we summarize differential abundance across conditions as defined in the cross-condition comparisons provided.
'
cat(plotlab)
Here we summarize differential abundance across conditions as defined in the cross-condition comparisons provided.
if(Pseudobulk_mode == T){
plotlab <- '
Replicates were provided, so we run the propeller test to take into account abundance in each replicate in the cross-condition comparison.
'
} else{
plotlab <- '
This analysis was run with the prop.test() function in R to compare overall cell proportions between conditions.
'
}
cat(plotlab)
Replicates were provided, so we run the propeller test to take into account abundance in each replicate in the cross-condition comparison.
compslen <- 1:nrow(comps)
compidx = 1 #for testing
#prep names
comps$labels <- paste0(comps$c1, '_vs_', comps$c2)
for(compidx in 1:length(compslen) ){
#get comparison condition levels
c1 <- comps[compidx,1]
c2 <- comps[compidx,2]
#get comp lab
lab <- comps[compidx,3]
#get comp analysis
diffcomp <- composition_comps[[lab]]
#get testused
comptestused <- ifelse(T == T,
yes = 'propeller',
no = 'R prop.test()')
plotlab <- '
### %s
'
cat(sprintf(plotlab, lab))
## print heatmap
plotlab <- '
#### Heatmap of compositional analysis results
Here we plot a heatmap of the cell proportions and compositional analysis from the %s analysis.
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05.
'
cat(sprintf(plotlab, comptestused))
print(diffcomp$hmprop_comp)
## print table
plotlab <- '
#### Table of compositional analysis results
Here we plot a table of the cell proportions and compositional analysis from the %s analysis. We round to the 3rd digit for display, which can result in low P values being shown as zeros (though the full analysis is saved in the output folder).
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05. FDR is not used to define significance but is provided for convenience.
'
cat(sprintf(plotlab, comptestused))
print_compres <- diffcomp$compres
print_compres[,-1] <- round(print_compres[,-1], 3)
print(knitr::kable(print_compres))
}
Here we plot a heatmap of the cell proportions and compositional analysis from the propeller analysis.
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05.
Here we plot a table of the cell proportions and compositional analysis from the propeller analysis. We round to the 3rd digit for display, which can result in low P values being shown as zeros (though the full analysis is saved in the output folder).
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05. FDR is not used to define significance but is provided for convenience.
| BaselineProp.clusters | BaselineProp.Freq | PropMean.Covid_Critical | PropMean.Healthy | PropRatio | Tstatistic | P.Value | FDR | |
|---|---|---|---|---|---|---|---|---|
| 7 | * 7 * | 0.011 | 0.022 | 0.001 | 32.936 | 2.231 | 0.043 | 0.104 |
| 4 | 4 | 0.206 | 0.248 | 0.169 | 1.465 | 1.721 | 0.107 | 0.188 |
| 3 | 3 | 0.228 | 0.249 | 0.211 | 1.181 | 0.809 | 0.432 | 0.504 |
| 2 | 2 | 0.267 | 0.277 | 0.259 | 1.070 | 0.361 | 0.723 | 0.723 |
| 1 | * 1 * | 0.254 | 0.199 | 0.301 | 0.660 | -2.204 | 0.045 | 0.104 |
| 6 | 6 | 0.010 | 0.003 | 0.016 | 0.179 | -1.253 | 0.231 | 0.323 |
| 5 | * 5 * | 0.023 | 0.002 | 0.043 | 0.047 | -3.027 | 0.009 | 0.063 |
Here we plot a heatmap of the cell proportions and compositional analysis from the propeller analysis.
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05.
Here we plot a table of the cell proportions and compositional analysis from the propeller analysis. We round to the 3rd digit for display, which can result in low P values being shown as zeros (though the full analysis is saved in the output folder).
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05. FDR is not used to define significance but is provided for convenience.
| BaselineProp.clusters | BaselineProp.Freq | PropMean.Covid_Mild | PropMean.Healthy | PropRatio | Tstatistic | P.Value | FDR | |
|---|---|---|---|---|---|---|---|---|
| 7 | 7 | 0.002 | 0.003 | 0.001 | 4.057 | 0.630 | 0.539 | 0.629 |
| 4 | 4 | 0.196 | 0.225 | 0.169 | 1.333 | 1.371 | 0.192 | 0.345 |
| 1 | 1 | 0.332 | 0.367 | 0.301 | 1.219 | 1.353 | 0.197 | 0.345 |
| 3 | 3 | 0.196 | 0.179 | 0.211 | 0.847 | -0.784 | 0.446 | 0.625 |
| 2 | 2 | 0.232 | 0.201 | 0.259 | 0.777 | -1.394 | 0.185 | 0.345 |
| 6 | 6 | 0.013 | 0.010 | 0.016 | 0.648 | -0.430 | 0.674 | 0.674 |
| 5 | 5 | 0.029 | 0.014 | 0.043 | 0.326 | -1.635 | 0.124 | 0.345 |
Here we plot a heatmap of the cell proportions and compositional analysis from the propeller analysis.
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05.
Here we plot a table of the cell proportions and compositional analysis from the propeller analysis. We round to the 3rd digit for display, which can result in low P values being shown as zeros (though the full analysis is saved in the output folder).
Clusters with significant compositional differences are labelled with two asterisks (ie * 1 *). Significance is defined as P < 0.05. FDR is not used to define significance but is provided for convenience.
| BaselineProp.clusters | BaselineProp.Freq | PropMean.Covid_Critical | PropMean.Covid_Mild | PropRatio | Tstatistic | P.Value | FDR | |
|---|---|---|---|---|---|---|---|---|
| 7 | 7 | 0.013 | 0.022 | 0.003 | 8.119 | 2.168 | 0.089 | 0.208 |
| 3 | 3 | 0.213 | 0.249 | 0.179 | 1.394 | 1.752 | 0.147 | 0.243 |
| 2 | 2 | 0.240 | 0.277 | 0.201 | 1.377 | 1.602 | 0.177 | 0.243 |
| 4 | 4 | 0.234 | 0.248 | 0.225 | 1.099 | 0.565 | 0.599 | 0.599 |
| 1 | 1 | 0.285 | 0.199 | 0.367 | 0.542 | -2.460 | 0.063 | 0.208 |
| 6 | 6 | 0.007 | 0.003 | 0.010 | 0.276 | -1.471 | 0.208 | 0.243 |
| 5 | 5 | 0.008 | 0.002 | 0.014 | 0.145 | -2.195 | 0.086 | 0.208 |
### clean up env
rm(print_compres, diffcomp, composition_comps, globalcomposition)
invisible(gc(full = T, reset = F, verbose = F))
Below we summarize the results of cross-condition differential expression (DE) analysis, for example KO vs WT, or disease vs healthy, or drugged vs control. As in single-cell data, we stratify the cross-condition DE by cluster. In other words, using the clusters defined above, we compare KO vs WT in cluster 1, cluster 2, and so on. For these types of analysis, it is recommended to have multiple biological replicates per condition.
If multiple replicates are available, then the best way to take advantage of these is to use a “pseudobulk” strategy. This refers to an approach in which cells from each sample are pooled together, and bulk RNA-seq analysis methods are used to identify DE genes across conditions. Pseudobulk differential expression analysis is done with EdgeR likelihood ratio test, as recommended by Squair et al 2021 Nat Com.
If replicates are not available, it is still possible to compare conditions. This is done using a wilcoxon test approach. Drawing conclusions from the results of this type of analysis are inherently limited as they constitute “N of 1” experiments, but can be considered as preliminary/pilot data.
if(Pseudobulk_mode == T){
detestused_text <- 'The differential expression analysis here used a pseudobulk edgeR approach. First, cells are "pseudobulked", or combined, at the cluster level by adding up the gene UMI counts in all cells in each cluster for each replicate. Next, EdgeR with the likelihood ratio test is applied.'
}
if(Pseudobulk_mode == F){
detestused_text <- 'The differential expression analysis here used a non-pseudobulk Wilcoxon test approach. The normalized counts of each gene are compared between cells in condition A vs B in each cluster.'
}
cat(detestused_text)
The differential expression analysis here used a pseudobulk edgeR approach. First, cells are “pseudobulked”, or combined, at the cluster level by adding up the gene UMI counts in all cells in each cluster for each replicate. Next, EdgeR with the likelihood ratio test is applied.
#prep names
comps$labels <- paste0(comps$c1, '_vs_', comps$c2)
#read sobjlist back in? keep it in?
# will need to optimize memory
#get cluster object name
clustname <- risc_clust_lab
if(Pseudobulk_mode == T){
m_bycluster_crosscondition_de_comps <- de_across_conditions_module(
sobjint = sobjint,
sample_metadata = sample_metadata,
comps = comps,
grouping_variable = clustname,
outdir_int=outdir_int,
assay = 'RNA', slot = 'counts',
Pseudobulk_mode = T,
cluster_prefix = T,
crossconditionDE_padj_thres = crossconditionDE_padj_thres,
crossconditionDE_lfc_thres = crossconditionDE_lfc_thres,
crossconditionDE_min.pct = 0.1
)
} else{
m_bycluster_crosscondition_de_comps <- de_across_conditions_module(
sobjint = sobjint,
sample_metadata = sample_metadata,
comps = comps,
grouping_variable = clustname,
outdir_int=outdir_int,
assay = 'RISC', slot = 'data',
Pseudobulk_mode = F,
cluster_prefix = T,
crossconditionDE_padj_thres = crossconditionDE_padj_thres,
crossconditionDE_lfc_thres = crossconditionDE_lfc_thres,
crossconditionDE_min.pct = 0
)
}
compslen <- 1:nrow(comps)
compidx = 1 #for testing
outdir_DE <- paste0(outdir_int, '/differentialexpression_crosscondition/')
outdir_DE_plots <- paste0(outdir_DE, '/plots/')
dir.create(outdir_DE_plots, recursive = T)
DEplots_comps <- lapply(compslen, function(compidx){
#get comparison condition levels
c1 <- comps[compidx,1]
c2 <- comps[compidx,2]
#get comp lab
lab <- comps[compidx,3]
message(lab)
#get cross conditions res per cluster list
m_bycluster_crosscondition_de <- m_bycluster_crosscondition_de_comps[[compidx]]
#get num DEGs
crossconditionDE_min.pct <- ifelse(Pseudobulk_mode == T, yes = 0.1, no = 0)
numdegs <- sapply(m_bycluster_crosscondition_de, function(m){
#normal fdr and padj thresholds
m <- m[m$FDR < crossconditionDE_padj_thres,, drop=F]
m <- m[abs(m$logFC) > crossconditionDE_lfc_thres,, drop=F]
#pct thresholds: +FC, pct1 > 0.1; -FC, pct2 > 0.1
upm <- m[m$logFC > 0,,drop=F]
upm <- upm[upm$pct.1 > crossconditionDE_min.pct,, drop=F]
dnm <- m[m$logFC < 0,,drop=F]
dnm <- dnm[dnm$pct.2 > crossconditionDE_min.pct,, drop=F]
m <- rbind(upm,dnm)
try( table( factor(sign(m$logFC), levels=c(-1,1)) ) )
})
numdegs <- t(numdegs)
colnames(numdegs) <- c(c2, c1)
#make sure all clusters are shown
# make a fake df and replace fake with real res
saved_cluster_levels <- paste0('cluster_',levels(sobjint$seurat_clusters))
numdegs_all <- data.frame(Cluster = saved_cluster_levels,
c1 = 0, c2 = 0)
colnames(numdegs_all) <- c('Cluster', c1, c2)
rownames(numdegs_all) <- numdegs_all$Cluster
numdegs_all[rownames(numdegs), c1] <- numdegs[,c1]
numdegs_all[rownames(numdegs), c2] <- numdegs[,c2]
rownames(numdegs_all) <- NULL
### at least one DEG must be present ###
numdegs <- numdegs[ (numdegs[,1] > 0) | (numdegs[,2] > 0) , ]
#make sure all samples from pmd are in the mlist...
subpmd <- sample_metadata[sample_metadata$Condition %in% c(c1,c2),]
#for each cluster, get the up/dn DEGs
i = 3
clustres <- lapply(1:nrow(numdegs), function(i){
clustlab <- rownames(numdegs)[i]
m <- m_bycluster_crosscondition_de[[clustlab]]
m <- m[m$FDR < crossconditionDE_padj_thres,, drop=F]
m <- m[abs(m$logFC) > crossconditionDE_lfc_thres,, drop=F]
crossconditionDE_min.pct <- ifelse(Pseudobulk_mode == T, yes = 0.1, no = 0)
upm <- m[m$logFC > 0,,drop=F]
upm <- upm[upm$pct.1 > crossconditionDE_min.pct,, drop=F]
dnm <- m[m$logFC < 0,,drop=F]
dnm <- dnm[dnm$pct.2 > crossconditionDE_min.pct,, drop=F]
m <- rbind(upm,dnm)
#make sure all samples are in top
# only if de pseudobulk
if(Pseudobulk_mode == T){
if( any(!(subpmd$Code %in% colnames(m))) ){
missingcodes <- subpmd$Code[!(subpmd$Code %in% colnames(m))]
missingcodecols <- lapply(missingcodes, function(code){
missingdf <- data.frame(missing = rep(0, nrow(m)) )
colnames(missingdf) <- code
return(missingdf)
})
missingcodecols <- dplyr::bind_cols(missingcodecols)
#bind missing cols to results
m <- cbind(m,missingcodecols)
}
#make sure order of samples is consistent
m_nosamp <- m[,!(colnames(m) %in% subpmd$Code)]
msamp <- m[,colnames(m) %in% subpmd$Code,drop=F]
msamp <- msamp[, match(subpmd$Code, colnames(msamp)) ]
m <- cbind(m_nosamp, msamp)
}
m$cluster <- clustlab
return(m)
})
names(clustres) <- rownames(numdegs)
# clustres <- do.call(rbind, clustres)
# ### summary heatmap ###
#
# # first make a summary heatmap, top 3 DEGs from each cluster up/dn...
#
# top <- lapply(clustres, function(m){
# top <- head( m[m$logFC > 0,,drop=F] , n = 3 )
# btm <- tail( m[m$logFC < 0,,drop=F] , n = 3 )
#
# subm <- rbind(top,btm)
#
# return(subm)
# })
#
# top <- do.call(rbind, top)
# rownames( top ) <- NULL
#
#
# #to make things easier, remove "cluster_"
# top$cluster <- gsub('cluster_', '', top$cluster)
#
#
#
# genes <- top$gene_symbol
#
#
# #subset for just these conditions
# md <- sobjint@meta.data
# md <- md[md$Condition %in% c(c1,c2),]
# sobjsub <- sobjint[,rownames(md)]
#
# #scale the relevant genes
# # sobjsub <- ScaleData(sobjsub, features = genes, verbose = verbose)
#
#
# # get the gene matrix
# # gem <- sobjsub@assays$RISC@scale.data
# gem <- sobjsub@assays$RISC@data
#
#
# #prep heatmap
# top <- top[top$gene %in% rownames(sobjsub),]
# gem <- gem[match(genes, rownames(gem)),]
#
# #as matrix
# gem <- as.matrix(gem)
#
#
# #annot for clusters
#
#
# #first order gem by cluster...
# md <- sobjsub@meta.data
# md <- md[order(md$seurat_clusters),]
#
# #within cluster, order by condition...
# mdx <- lapply( unique(md$seurat_clusters) , function(clust){
# mdsub <- md[md$seurat_clusters==clust,]
#
# mdc1 <- mdsub[mdsub$Condition == c1,,drop=F]
# mdc2 <- mdsub[mdsub$Condition == c2,,drop=F]
#
# mdsub <- rbind(mdc1,mdc2)
# return(mdsub)
#
# })
# md <- do.call(rbind, mdx)
#
# #match gem order too
# gem <- gem[,match(rownames(md), colnames(gem))]
#
# #cluster annot and colors
# clust_bc <- setNames(md$seurat_clusters,
# nm = colnames(gem)
# )
# col_clust <- setNames(scales::hue_pal()(length(levels(sobjsub$seurat_clusters))),
# nm = levels(sobjsub$seurat_clusters))
#
# #condition annot and colors
# cond_bc <- setNames(md$Condition,
# nm = colnames(gem))
#
# #for cond colors, set up color scheme...
# set2 <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3")
#
#
# set2 <- set2[1:length(unique(sample_metadata$Condition))]
#
# col_cond <- setNames(set2,
# nm = unique(sample_metadata$Condition) )
#
# col_cond <- col_cond[names(col_cond) %in% c(c1,c2)]
#
#
#
# ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = clust_bc,
# Condition = cond_bc,
#
# col = list(Cluster = col_clust,
# Condition = col_cond),
# show_legend = F)
#
#
#
# #annot for markers
# #set genes according to ct_ordered
# top$cluster <- factor(top$cluster, levels=unique(top$cluster))
#
# #condition; can just use +/- lfc
# top$Condition <- c1
# top[top$logFC < 0, 'Condition'] <- c2
#
#
# #match order of genes of gem with stats res
# gem <- gem[match(top$gene, rownames(gem)),]
#
#
# #set up annot for cluster for genes
# ct_gene <- setNames(top$cluster,
# nm=top$gene_symbol)
# col_gene <- col_clust
# # col_gene <- col_gene[names(col_gene) %in% top$cluster]
#
# #set up annot for condition for genes
# cond_gene <- setNames(top$Condition,
# nm = top$gene_symbol)
# col_genecond <- col_cond
#
#
# ha_genes <- ComplexHeatmap::rowAnnotation(Cluster = ct_gene,
# Condition = cond_gene,
# col = list(Cluster = col_gene, Condition = col_genecond),
# show_annotation_name=F)
#
#
# #restrict range
# # gem[gem>5] <- 5
# # gem[gem<-5] <- -5
# gem <- log1p(gem)
#
# pal <- circlize::colorRamp2(c(0, max(gem)), c("white", "red"))
#
# #actual heatmap
# hm_DE <- ComplexHeatmap::Heatmap(gem,
# # column_title = 'Integrated clusters',
# column_labels = rep('', ncol(gem)),
# row_names_gp = grid::gpar(fontsize = 5),
# column_split = md$seurat_clusters,
# row_split = top$cluster,
# row_title_gp = grid::gpar(fontsize = 5),
# row_gap = unit(0.8, "mm"),
# column_gap = unit(0.8, "mm"),
# border_gp = gpar(col = "black", lwd = 0.1),
# row_title_rot = 0,
# column_title_rot = 45,
# column_title_gp = grid::gpar(fontsize = 7),
# col = pal,
# name = 'Log1p\nRISC Norm\nCounts',
# cluster_columns = F,
# cluster_rows = F,
# top_annotation = ha_clust,
# left_annotation = ha_genes,
# use_raster = F)
#
#
#
#
#
#
#
#
#
# ### same thing but avg
# # ie, avg clust 1 c1, avg clust 1 c2, avg clust 2 c1, avg clust 2 c2, etc...
#
#
# md <- sobjsub@meta.data
# md$condclust <- paste0( md$seurat_clusters, '_c1')
# md[md$Condition == c2,'condclust'] <- paste0( md[md$Condition == c2,'seurat_clusters'], '_c2')
# oldlevs <- str_sort(unique(md$condclust), numeric = T)
# levs <- oldlevs
# levs <- gsub('c1', c1, levs)
# levs <- gsub('c2', c2, levs)
# md$condclust <- factor(md$condclust, levels = oldlevs)
# md$condclust <- plyr::mapvalues(md$condclust, oldlevs, levs)
#
#
# sobjsub$condclust <- md$condclust
# avg <- AverageExpression(sobjsub, assays = 'RISC', slot = 'data',
# features = rownames(gem),
# group.by = 'condclust')
#
#
# avg <- as.matrix(avg[[1]])
#
# ## set up avg heatmap, annots etc ##
#
# #column annots
# #cluster annot and colors
# AVGclust_bc <- setNames(str_split_fixed(colnames(avg), '_', 2)[,1],
# nm = colnames(avg)
# )
# AVGcol_clust <- setNames(scales::hue_pal()(length(levels(sobjsub$seurat_clusters))),
# nm = levels(sobjsub$seurat_clusters))
#
# #condition annot and colors
# AVGcond_bc <- setNames(str_split_fixed(colnames(avg), '_', 2)[,2],
# nm = colnames(avg))
#
# #for cond colors, set up color scheme...
# set2 <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3")
#
# set2 <- set2[1:length(unique(sample_metadata$Condition))]
#
# col_cond <- setNames(set2,
# nm = unique(sample_metadata$Condition) )
#
# col_cond <- col_cond[names(col_cond) %in% c(c1,c2)]
#
#
#
# ha_clust <- ComplexHeatmap::HeatmapAnnotation(Cluster = AVGclust_bc,
# Condition = AVGcond_bc,
#
# col = list(Cluster = AVGcol_clust,
# Condition = col_cond),
# show_legend = F)
#
#
#
#
# #annot for markers
# #set genes according to ct_ordered
# # top$cluster <- factor(top$cluster, levels=unique(top$cluster))
#
# #condition; can just use +/- lfc
# # top$Condition <- c1
# # top[top$logFC < 0, 'Condition'] <- c2
#
#
# #match order of genes of gem with stats res
# avg <- avg[match(top$gene, rownames(avg)),]
#
#
# # #set up annot for cluster for genes
# # ct_gene <- setNames(top$cluster,
# # nm=top$gene_symbol)
# # col_gene <- col_clust
# # # col_gene <- col_gene[names(col_gene) %in% top$cluster]
# #
# # #set up annot for condition for genes
# # cond_gene <- setNames(top$Condition,
# # nm = top$gene_symbol)
# # col_genecond <- col_cond
# #
# #
# # ha_genes <- ComplexHeatmap::rowAnnotation(Cluster = ct_gene,
# # Condition = cond_gene,
# # col = list(Cluster = col_gene, Condition = col_genecond),
# # show_annotation_name=F)
#
#
# #restrict range
# # gem[gem>5] <- 5
# # gem[gem<-5] <- -5
#
# avg <- log1p(avg)
#
# pal <- circlize::colorRamp2(c(0, max(avg)), c("white", "red"))
#
# #make column order not alphabetic...
# colsplit <- colnames(avg)
# colsplit <- as.character(colsplit)
# colsplit <- str_split_fixed(colsplit, '_', 2)[,1]
# colsplit <- factor( colsplit,
# levels = str_sort(unique(colsplit), numeric = T))
#
# #actual heatmap
# hm_DE_AVG <- ComplexHeatmap::Heatmap(avg,
# # column_title = 'Integrated clusters',
# # column_labels = rep('', ncol(gem)),
# column_names_gp = grid::gpar(fontsize = 5),
# column_names_rot = 45,
# row_names_gp = grid::gpar(fontsize = 5),
# column_split = colsplit,
# row_split = top$cluster,
# row_title_gp = grid::gpar(fontsize = 5),
# row_gap = unit(0.8, "mm"),
# column_gap = unit(0.8, "mm"),
# border_gp = gpar(col = "black", lwd = 0.1),
# row_title_rot = 0,
# column_title_rot = 45,
# column_title_gp = grid::gpar(fontsize = 7),
# col = pal,
# name = 'Log1p\nRISC Norm\nCounts',
# cluster_columns = F,
# cluster_rows = F,
# top_annotation = ha_clust,
# left_annotation = ha_genes,
# use_raster = F)
#
#
#
#
### for each cluster, make a heatmap of the DEGs using single-cell data
i=3
cluster_de_heatmap_l <- lapply(1:length(clustres), function(i){
#get DEGs
m <- clustres[[i]]
#sort degs
m <- m[order(m$logFC, decreasing = T),]
#get cluster name
cluster <- names(clustres)[i]
message(cluster)
#get cluster without "cluster_" prefix
cluster_noprefix <- gsub('cluster_', '', cluster)
#get cluster cells; order by c1 then c2
md <- sobjint@meta.data
mdclust <- md[md$seurat_clusters == cluster_noprefix,]
mdclust <- mdclust[mdclust$Condition %in% c(c1,c2),]
mdclust$Condition <- factor(mdclust$Condition, levels = c(c1,c2))
mdclust <- mdclust[order(mdclust$Condition), ]
clustcells <- rownames(mdclust)
#get matrix of DEGs of cells from this cluster
mat <- sobjint@assays$RISC@data
mat <- mat[match(m$gene_symbol, rownames(mat)),clustcells,drop=F]
#scale matrix
mat <- as.matrix(mat)
mat <- t(scale(t(mat)))
#restrict range
mat[mat > 5 ] <- 5
mat[mat < -5] <- -5
hm_de_clust <- Heatmap(mat,
show_column_names = F,
cluster_column_slices = F, cluster_columns = F,
cluster_row_slices = F, cluster_rows = F,
show_row_names = ifelse(nrow(mat)<=50, yes = T, no = F),
name = paste0('Scaled\nRISC\nValues'),
column_split = mdclust$Condition,
row_split = factor(sign(m$logFC), levels = c(1,-1)),
border_gp = gpar(col = "black", lwd = 1)
)
pdf(NULL)
hm_de_clust <- invisible(
draw(hm_de_clust,
column_title = paste0(lab, '\n', str_to_title(cluster) ),
row_title = 'Log Fold Change Sign')
)
dev.off()
if(Pseudobulk_mode == T){
### just plot the values
pb_mat <- m[,colnames(m) %in% mdclust$Code]
pb_mat <- as.matrix(pb_mat)
pb_mat <- log1p(pb_mat)
pb_mat <- t(scale(t(pb_mat)))
#restrict range
pb_mat[pb_mat > 5 ] <- 5
pb_mat[pb_mat < -5] <- -5
#order md properly
#get just this comp samples
subpmd <- sample_metadata[sample_metadata$Condition %in% c(c1,c2),]
#order columns of heatmap by c1 vs c2
code_comps_order <- subpmd[subpmd$Condition == c1,"Code"]
code_comps_order <- c(code_comps_order, subpmd[subpmd$Condition == c2,"Code"])
#make sure they are in the matrix
code_comps_order <- code_comps_order[code_comps_order %in% colnames(pb_mat)]
#also prepare factor for ordering of heatmap
condition_vector_ordering <- factor(subpmd[match(code_comps_order, subpmd$Code), "Condition"], levels = c(c1,c2))
pb_mat <- pb_mat[,code_comps_order,drop=F]
hm_de_clust_pb <- Heatmap(pb_mat,
show_column_names = T,
cluster_column_slices = F, cluster_columns = F,
cluster_row_slices = F, cluster_rows = F,
show_row_names = ifelse(nrow(mat)<=50, yes = T, no = F),
name = paste0('Scaled\nPseudobulk\nValues'),
column_split = condition_vector_ordering,
row_split = factor(sign(m$logFC), levels = c(1,-1)),
border_gp = gpar(col = "black", lwd = 1)
)
pdf(NULL)
hm_de_clust_pb <- invisible(
draw(hm_de_clust_pb,
column_title = paste0(lab, '\n', str_to_title(cluster) ),
row_title = 'Log Fold Change Sign')
)
dev.off()
}
if(Pseudobulk_mode == F){
#get DEGs
m <- clustres[[i]]
#sort degs
m <- m[order(m$logFC, decreasing = T),]
#get cluster name
cluster <- names(clustres)[i]
#get cluster without "cluster_" prefix
cluster_noprefix <- gsub('cluster_', '', cluster)
#get cluster cells; order by c1 then c2
md <- sobjint@meta.data
mdclust <- md[md$seurat_clusters == cluster_noprefix,]
mdclust$Condition <- factor(mdclust$Condition, levels = c(c1,c2))
mdclust <- mdclust[order(mdclust$Condition), ]
clustcells <- rownames(mdclust)
#get matrix of DEGs of cells from this cluster
mat <- sobjint@assays$RISC@data
mat <- mat[match(m$gene_symbol, rownames(mat)),clustcells,drop=F]
#order md properly
#get just this comp samples
subpmd <- sample_metadata[sample_metadata$Condition %in% c(c1,c2),]
#order columns of heatmap by c1 vs c2
code_comps_order <- subpmd[subpmd$Condition == c1,"Code"]
code_comps_order <- c(code_comps_order, subpmd[subpmd$Condition == c2,"Code"])
#make sure sample is even present in cluster
code_comps_order <- code_comps_order[code_comps_order %in% mdclust$Code]
#also prepare factor for ordering of heatmap
condition_vector_ordering <- factor(subpmd[match(code_comps_order, subpmd$Code), "Condition"], levels = c(c1,c2))
avg_l <- lapply(code_comps_order, function(code){
codecells <- rownames(mdclust[mdclust$Code == code,])
avgmat <- Matrix::rowMeans(mat[,codecells, drop=F])
avgmat <- matrix(avgmat)
rownames(avgmat) <- rownames(mat); colnames(avgmat) <- code
return(avgmat)
})
avgmat <- as.matrix(dplyr::bind_cols(avg_l))
rownames(avgmat) <- rownames(mat)
colnames(avgmat) <- code_comps_order
avgmat <- as.matrix(avgmat)
avgmat <- t(scale(t(avgmat)))
#restrict range
avgmat[avgmat > 5 ] <- 5
avgmat[avgmat < -5] <- -5
hm_de_clust_pb <- Heatmap(avgmat,
show_column_names = T,
cluster_column_slices = F, cluster_columns = F,
cluster_row_slices = F, cluster_rows = F,
show_row_names = ifelse(nrow(mat)<=50, yes = T, no = F),
name = paste0('Scaled\nMean\nRisc\nValues'),
column_split = condition_vector_ordering,
row_split = factor(sign(m$logFC), levels = c(1,-1)),
border_gp = gpar(col = "black", lwd = 1)
)
pdf(NULL)
hm_de_clust_pb <- invisible(
draw(hm_de_clust_pb,
column_title = paste0(lab, '\n', str_to_title(cluster) ),
row_title = 'Log Fold Change Sign')
)
dev.off()
}
return(list(hm_de_clust = hm_de_clust,
hm_de_clust_pb = hm_de_clust_pb))
})
names(cluster_de_heatmap_l) <- names(clustres)
deplotlist <- list(#hm_DE = hm_DE,
#hm_DE_AVG = hm_DE_AVG,
cluster_de_heatmap_l = cluster_de_heatmap_l)
deplotfile <- paste0(outdir_DE_plots, lab, '.pdf')
pdf(deplotfile, height = 9, width = 9)
# print(deplotlist$hm_DE)
# print(deplotlist$hm_DE_AVG)
invisible(print(deplotlist$cluster_de_heatmap_l))
dev.off()
return(deplotlist)
})
names(DEplots_comps) <- comps$labels
### save cluster levels ###
saved_cluster_levels <- levels(sobjint$seurat_clusters)
pwayoutdir <- paste0(outdir_int, '/pathwayanalysis_crosscondition/')
dir.create(pwayoutdir, recursive = T)
#at this point, we no longer need many of the high memory using objects
rm(sobjint,
ap_cond_to_clust, ap_int_celltypes, ap_samp_to_clust, avg, avgl, ctplots_int,
cutoffplots, cutoffs,d1_int, d2_int, d3_int,dcond,dcond_split,dend,dp_ctscores,
gem,ha_clust,ha_genes,hm_ctscores,hm_int, hm_refmarkers, hm_refmarkers_avg,
intmd, labelsdf, m_integrated_clusters,m_ref_small, m_reference, md,mdint,nicedend,
sampsumplots, summaryplots_integrated, summaryplots_integrated_ct, thisct_plots, top,
d4_int, ip)
invisible(gc(full = T, reset = F, verbose = F))
#prep pathways
pathways <- preppathways_pathwayanalysis_crosscondition_module(species = species,
outdir_int = outdir_int)
### run main pathway analysis ###
pways_output_list <- pathwayanalysis_crosscondition_module(
m_bycluster_crosscondition_de_comps = m_bycluster_crosscondition_de_comps,
pathways = pathways,
sample_metadata = sample_metadata,
# deg.weight = "pval", #this is deprecated now
comps = comps,
workernum = workernum,
outdir_int = outdir_int
)
pathway_analysis_mainlist_comps <- pways_output_list$pathway_analysis_mainlist_comps
pathwaysummplots_comps <- pways_output_list$pathwaysummplots_comps
### for easily reproducing plots and etc, save them as R objects...
DE_pathways_plot_objects_list <- list(comps = comps,
m_bycluster_crosscondition_de_comps = m_bycluster_crosscondition_de_comps,
pathway_analysis_mainlist_comps = pathway_analysis_mainlist_comps,
pathwaysummplots_comps = pathwaysummplots_comps,
crossconditionDE_padj_thres = crossconditionDE_padj_thres,
crossconditionDE_lfc_thres = crossconditionDE_lfc_thres)
DE_pathways_plot_objects_list_file <- paste0(pwayoutdir, '/DE_pathways_plot_objects_list.rds')
saveRDS(DE_pathways_plot_objects_list, DE_pathways_plot_objects_list_file)
#### print out NUMDEGS, NUMDEGS THRESHOLDED, AND PWAY ANALYSIS TO HTML
#do this for each comparison
compslen <- nrow(comps)
for(compidx in c(1:compslen) ){
#get comparison condition levels
c1 <- comps[compidx,1]
c2 <- comps[compidx,2]
#get comp lab
lab <- comps[compidx,3]
#get cross conditions res per cluster list
m_bycluster_crosscondition_de <- m_bycluster_crosscondition_de_comps[[compidx]]
#get pway results full list
pathway_analysis_mainlist <- pathway_analysis_mainlist_comps[[compidx]]
#get pway summary plots
summplots_cats <- pathwaysummplots_comps[[compidx]]
# get the deplots
deplotlist <- DEplots_comps[[lab]]
complab <- "
## %s
"
cat(sprintf(complab, lab))
if(Pseudobulk_mode == T){
plotlab <- "
### Number of significant DEGs across conditions in each cluster
Here we check number of differentially expressed genes (DEGs) after applying some statistical thresholds:
* Adjusted P value < %s
* Log Fold Change > +/- %s
* Proportion of cells expressing gene in condition A > 0.1 if LFC is positive
* Proportion of cells expressing gene in condition B > 0.1 if LFC is negative
Differential expression compares genes in condition A (left) versus condition B (right). The left column indicates the number of genes upregulated in the left condition, while the right column indicates genes upregulated in the right condition.
Finally, the thresholds above do not affect downstream results from pathway analysis, they are just meant to count the number of DEGs. Downstream usage of DEGs can use these thresholds, or you can choose other appropriate cutoffs.
"
cat( sprintf(plotlab, crossconditionDE_padj_thres, crossconditionDE_lfc_thres) )
numdegs <- sapply(m_bycluster_crosscondition_de, function(m){
#normal fdr and padj thresholds
m <- m[m$FDR < crossconditionDE_padj_thres,, drop=F]
m <- m[abs(m$logFC) > crossconditionDE_lfc_thres,, drop=F]
#pct thresholds: +FC, pct1 > 0.1; -FC, pct2 > 0.1
upm <- m[m$logFC > 0,,drop=F]
upm <- upm[upm$pct.1 > 0.1,, drop=F]
dnm <- m[m$logFC < 0,,drop=F]
dnm <- dnm[dnm$pct.2 > 0.1,, drop=F]
m <- rbind(upm,dnm)
try( table( factor(sign(m$logFC), levels=c(-1,1)) ) )
})
numdegs <- t(numdegs)
colnames(numdegs) <- c(c2, c1)
#make sure all clusters are shown
# make a fake df and replace fake with real res
saved_cluster_levels_withlab <- paste0('cluster_', saved_cluster_levels)
numdegs_all <- data.frame(Cluster = saved_cluster_levels_withlab,
c1 = 0, c2 = 0)
colnames(numdegs_all) <- c('Cluster', c1, c2)
rownames(numdegs_all) <- numdegs_all$Cluster
numdegs_all[rownames(numdegs), c1] <- numdegs[,c1]
numdegs_all[rownames(numdegs), c2] <- numdegs[,c2]
rownames(numdegs_all) <- NULL
#rename colnames to have high
colnames(numdegs_all) <- c('Cluster', paste0(c1, '_high'), paste0(c2, '_high'))
print(knitr::kable(numdegs_all))
}
if(Pseudobulk_mode == F){
plotlab <- "
### Number of significant DEGs across conditions in each cluster
Here we check number of differentially expressed genes (DEGs) after applying some statistical thresholds:
* Adjusted P value < %s
* Log Fold Change > +/- %s
Differential expression compares genes in condition A (left) versus condition B (right). The left column indicates the number of genes upregulated in the left condition, while the right column indicates genes upregulated in the right condition.
Finally, the thresholds above do not affect downstream results from pathway analysis, they are just meant to count the number of DEGs. Downstream usage of DEGs can use these thresholds, or you can choose other appropriate cutoffs.
"
cat( sprintf(plotlab, crossconditionDE_padj_thres, crossconditionDE_lfc_thres) )
numdegs <- sapply(m_bycluster_crosscondition_de, function(m){
#normal fdr and padj thresholds
m <- m[m$FDR < crossconditionDE_padj_thres,, drop=F]
m <- m[abs(m$logFC) > crossconditionDE_lfc_thres,, drop=F]
#pct thresholds for pb_edgeR only: +FC, pct1 > 0.1; -FC, pct2 > 0.1
upm <- m[m$logFC > 0,,drop=F]
dnm <- m[m$logFC < 0,,drop=F]
m <- rbind(upm,dnm)
try( table( factor(sign(m$logFC), levels=c(-1,1)) ) )
})
numdegs <- t(numdegs)
colnames(numdegs) <- c(c2, c1)
#make sure all clusters are shown
# make a fake df and replace fake with real res
saved_cluster_levels_withlab <- paste0('cluster_', saved_cluster_levels)
numdegs_all <- data.frame(Cluster = saved_cluster_levels_withlab,
c1 = 0, c2 = 0)
colnames(numdegs_all) <- c('Cluster', c1, c2)
rownames(numdegs_all) <- numdegs_all$Cluster
numdegs_all[rownames(numdegs), c1] <- numdegs[,c1]
numdegs_all[rownames(numdegs), c2] <- numdegs[,c2]
rownames(numdegs_all) <- NULL
#rename colnames to have high
colnames(numdegs_all) <- c('Cluster', paste0(c1, '_high'), paste0(c2, '_high'))
print(knitr::kable(numdegs_all))
}
# plotlab <- '
# ### Heatmap of top cross-condition DEGs for each cluster
#
# Here we plot the top 3 differentially expressed genes across conditions from each cluster.
# In other words, in cluster 1, we have the top 3 DEGs from %s and the top 3 DEGs from %s, then the same in cluster 2, so on.
#
#
#
# '
#
# cat(sprintf(plotlab, c1, c2))
#
# print(deplotlist$hm_DE)
# ### this is kind of bugged, it prints the plot without making the title a heading...
# cat('\n\n\n')
#
# plotlab <- '
# ### Heatmap of top cross-condition DEGs for after averaging each cluster
#
# Here we plot the top 3 differentially expressed genes across conditions from each cluster.
# This heatmap is similar to the one above, but rather than showing all the cells from each cluster, we show the cluster averages.
# In other words, in cluster 1, we have the top 3 DEGs from %s and the top 3 DEGs from %s, then the same in cluster 2, so on.
#
#
#
# '
#
# cat(sprintf(plotlab, c1, c2))
#
# print(deplotlist$hm_DE_AVG)
cat('\n\n\n')
plotlab <- '
### Per-cluster heatmaps of all DEGs
To ensure the differential expression results are robust, it is helpful to inspect all DEGs by visualizing them in a heatmap. For each cluster, we compare the DEGs at both the single cell level, and either the pseudobulk level (if we used pseudobulk_edgeR) or the average RISC value level (if using wilcox).
'
cat(sprintf(plotlab))
cluster_de_heatmap_l <- deplotlist$cluster_de_heatmap_l
for(i in 1:length(cluster_de_heatmap_l) ){
clust = names(cluster_de_heatmap_l)[i]
clust = str_to_title(clust)
clustplots <- cluster_de_heatmap_l[[i]]
plotlab <- '
#### %s
'
cat(sprintf(plotlab, clust))
plotlab <- '
##### Differentially Expressed Gene Heatmap for all cells in this cluster
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
'
cat(plotlab)
print(clustplots[[1]])
cat('\n\n\n')
if(Pseudobulk_mode == T){
plotlab <- '
##### Differentially Expressed Gene Heatmap plotted at pseudobulk level for replicates for this cluster
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
'
}
if(Pseudobulk_mode == F){
plotlab <- '
##### Differentially Expressed Gene Heatmap plotted after averaging RISC values for samples for this cluster
Here, we plot all significant DEGs after averaging the RISC-transformed values for samples in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
'
}
cat(plotlab)
print(clustplots[[2]])
cat('\n\n\n')
}
rm(cluster_de_heatmap_l, clustplots)
pwayDElab <- "
### Gene set enrichment analysis across conditions
Pathway analysis for the cross-condition analysis is performed for overexpressed and underexpressed genes for each cluster. This is done via Gene Set Enrichment Analysis (GSEA) [(Subramanian et al 2005)](https://www.pnas.org/doi/10.1073/pnas.0506580102).
GSEA is preferred over other pathway analysis such as fisher tests or chi-square tests because it does not require making arbitrary cutoffs to the number of DEGs and takes into account how strongly differentially expressed each gene may be. For the latter, data-driven gene-specific weight is applied. We use a standard weighting method of -log10(P-value) * sign of Log Fold Change.
The pathways we choose in pathway analysis are derived from the Molecular Signatures Database (MSIGDB) where they are sorted by categories, such as Gene Ontology (GO) Biological Process, GO Molecular Function, KEGG, Reactome, etc. These are databases that annotate genes by function or molecular pathway.
"
cat(pwayDElab)
# print the pathway analysis
pathway_analysis_main_nonull <- pathway_analysis_mainlist[lengths(pathway_analysis_mainlist) != 0]
#loop through each category:
# loop thru each cluster
# get up/down pathways if there are any in each cluster
# make some adjustments to the table to adjust for printing: shorten gene list
# print for whole category:
# 1. summary of whole category, up pathways
# 2. summary of whole category, dn pathways
# 3. cluster by cluster, up tables, up dotplots
# 4. cluster by cluster, dn tables, dn dotplots
for(i in 1:length(pathway_analysis_main_nonull) ){
#get category name
cat <- names(pathway_analysis_main_nonull)[i]
#get actual cluster results, and remove NAs...
pwaycat <- pathway_analysis_main_nonull[[i]]
# remove clusters with no pathways --> this sets them to list of lengths 0
pwaycat = lapply(pwaycat, function(clust){clust[lengths(clust) != 0]})
# remove the clusters with no pathways by removing the lists of length 0
pwaycat <- pwaycat[lengths(pwaycat) != 0]
### if the category has NO PATHWAYS SIGNIFICANT in ANY cluster, just skip it
if(length(pwaycat) == 0){
next
}
#in each category, loop thru each cluster
#loop through each cluster's results; this list contains up/down for each pathway
# j is the cluster index
#for(j in 1:length(pwaycat) ){
clustlist <- lapply( c( 1:length(pwaycat) ), function(j){
#get cluster name
clust <- names(pwaycat)[j]
#get this clusters up/down list, and remove if null
clustcat <- pwaycat[[j]]
clustcat <- clustcat[lengths(clustcat) != 0]
if(length(clustcat)==0){
return()
}
#get the results for this cluster
dirreslist <- clustcat
#get the plot
dp <- dirreslist$dp
#get the table
gseares <- dirreslist$gseares
# use the dotplot data to subset only significant
gseares_fromplot <- dp$data
#fix up so we can match the two
gseares_fromplot$Description <- gseares_fromplot$pathway
gseares_fromplot$Description <- gsub(' ', replacement = '_', gseares_fromplot$Description)
gseares_fromplot$Description <- gsub('\n', replacement = '_', gseares_fromplot$Description)
gseares <- gseares[match(gseares_fromplot$Description, gseares$pathway),]
#modify cpres... keep only important columns...
cpshow <- gseares
rownames(cpshow) <- NULL
cpshow <- cpshow[,c('pathway', "NES", "ES", "pval", "padj", "log2err", "size", "leadingEdge")]
#keep only top 5 leading edge genes
top5gene <- sapply(cpshow$leadingEdge, function(x){
x <- x[x!='']
if(length(x) > 5){
x <- head(x,5)
x[6] <- '...'
}
paste(x, collapse = '/')
})
cpshow$leadingEdge <- top5gene
#instead of printing, output a list of them
return(
list(cpshow=cpshow,
dp=dp)
)
## return just the cpshow
#return(cpshow)
}) #close cluster lapply
names(clustlist) <- names(pwaycat)
#get the summaryplots
# i is the category index
summplots_conds <- summplots_cats[[i]]
# print for whole category:
# 1. summary dotplot of whole category, up pathways
# 2. summary dotplot of whole category, dn pathways
# 3. cluster by cluster, up/dn table and dotplots
#print category label
catlab <- "
#### %s
"
#prep cluster label, will print this for each cluster
cat(sprintf(catlab, cat))
for(condidx in 1:length(c(c1,c2)) ){
cond <- c(c1,c2)[condidx]
summplot <- summplots_conds[[condidx]]
if( is.null(summplot) ){
summlab <- "
##### Summary %s, no pathways enriched
This category of pathways had no signifcantly enriched pathways in %s
"
cat(sprintf(summlab, cond, cond))
next
}
summlab <- "
##### Summaryplot: %s
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in %s. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
"
cat(sprintf(summlab, cond, cond))
print(summplot)
}
clustlab <- "
##### Per-cluster pathway results
Here we plot the pathways that are significantly enriched in the differentially expressed genes between %s and %s for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in %s relative to %s, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
"
cat(sprintf(clustlab, c1, c2, c1, c2))
#for each cluster, print out the dotplot and table,
# or a print a message saying none significant
for(clust in names(clustlist) ){
clust_cond_lab <- "
###### %s
"
cat(sprintf(clust_cond_lab, clust))
clust_plot_tab <- clustlist[[clust]]
cpshow <- clust_plot_tab$cpshow
dp <- clust_plot_tab$dp
print( knitr::kable(cpshow) )
print(dp)
} #close cluster printing loop
} # close category by category for loop
} # close comparisons loop
Here we check number of differentially expressed genes (DEGs) after applying some statistical thresholds:
Adjusted P value < 0.1
Log Fold Change > +/- 0
Proportion of cells expressing gene in condition A > 0.1 if LFC is positive
Proportion of cells expressing gene in condition B > 0.1 if LFC is negative
Differential expression compares genes in condition A (left) versus condition B (right). The left column indicates the number of genes upregulated in the left condition, while the right column indicates genes upregulated in the right condition.
Finally, the thresholds above do not affect downstream results from pathway analysis, they are just meant to count the number of DEGs. Downstream usage of DEGs can use these thresholds, or you can choose other appropriate cutoffs.
| Cluster | Covid_Critical_high | Healthy_high |
|---|---|---|
| cluster_1 | 306 | 133 |
| cluster_2 | 63 | 16 |
| cluster_3 | 69 | 26 |
| cluster_4 | 389 | 192 |
| cluster_5 | 0 | 0 |
| cluster_6 | 36 | 41 |
| cluster_7 | 0 | 0 |
To ensure the differential expression results are robust, it is helpful to inspect all DEGs by visualizing them in a heatmap. For each cluster, we compare the DEGs at both the single cell level, and either the pseudobulk level (if we used pseudobulk_edgeR) or the average RISC value level (if using wilcox).
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Pathway analysis for the cross-condition analysis is performed for overexpressed and underexpressed genes for each cluster. This is done via Gene Set Enrichment Analysis (GSEA) (Subramanian et al 2005).
GSEA is preferred over other pathway analysis such as fisher tests or chi-square tests because it does not require making arbitrary cutoffs to the number of DEGs and takes into account how strongly differentially expressed each gene may be. For the latter, data-driven gene-specific weight is applied. We use a standard weighting method of -log10(P-value) * sign of Log Fold Change.
The pathways we choose in pathway analysis are derived from the Molecular Signatures Database (MSIGDB) where they are sorted by categories, such as Gene Ontology (GO) Biological Process, GO Molecular Function, KEGG, Reactome, etc. These are databases that annotate genes by function or molecular pathway.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_E2F_TARGETS | 2.728753 | 0.8201163 | 0.0000000 | 0.0000000 | 1.5565442 | 192 | TK1/STMN1/MKI67/CDC20/BIRC5/… |
| HALLMARK_G2M_CHECKPOINT | 2.637151 | 0.7966623 | 0.0000000 | 0.0000000 | 1.3877461 | 172 | STMN1/MKI67/UBE2C/CDC20/BIRC5/… |
| HALLMARK_MYC_TARGETS_V1 | 2.090822 | 0.6294245 | 0.0000000 | 0.0000000 | 0.8140358 | 190 | TYMS/CDC20/MCM4/DUT/PCNA/… |
| HALLMARK_MITOTIC_SPINDLE | 2.013879 | 0.6113177 | 0.0000000 | 0.0000004 | 0.7195128 | 165 | BIRC5/TOP2A/CENPF/TPX2/PLK1/… |
| HALLMARK_ESTROGEN_RESPONSE_LATE | 1.950394 | 0.6695374 | 0.0000248 | 0.0002477 | 0.5756103 | 70 | AREG/PRSS23/CISH/ZFP36/FABP5/… |
| HALLMARK_MTORC1_SIGNALING | 1.666299 | 0.5043537 | 0.0002052 | 0.0017100 | 0.5188481 | 180 | MCM4/RRM2/PLK1/DHFR/MCM2/… |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 1.783066 | 0.6326189 | 0.0002650 | 0.0018928 | 0.4984931 | 60 | AREG/LGALS1/TNFAIP3/JUN/TIMP1/… |
| HALLMARK_GLYCOLYSIS | 1.645698 | 0.5158091 | 0.0008169 | 0.0051056 | 0.4772708 | 129 | STMN1/CDK1/HMMR/NASP/AURKA/… |
| HALLMARK_SPERMATOGENESIS | 1.611055 | 0.5784368 | 0.0039929 | 0.0174638 | 0.4070179 | 54 | CDKN3/CCNB2/CDK1/NCAPH/KIF2C/… |
| HALLMARK_APOPTOSIS | 1.589836 | 0.4989284 | 0.0041913 | 0.0174638 | 0.4070179 | 127 | TOP2A/LGALS3/HMGB2/PMAIP1/BAX/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 2.322689 | 0.6313845 | 0.0000000 | 0.0000000 | 0.8986712 | 140 | KLF2/RHOB/CD69/JUN/ZBTB10/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.936725 | 0.5591517 | 0.0000070 | 0.0001753 | 0.6105269 | 95 | CD69/EMP3/KLF6/MYC/NFKBIA/… |
| HALLMARK_P53_PATHWAY | 1.738750 | 0.4725668 | 0.0000771 | 0.0012851 | 0.5384341 | 143 | JUN/BAX/PPP1R15A/TOB1/FOS/… |
| HALLMARK_APOPTOSIS | 1.742682 | 0.4860557 | 0.0002342 | 0.0029277 | 0.5188481 | 123 | RHOB/CD69/JUN/PMAIP1/IRF1/… |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | 1.827284 | 0.5843341 | 0.0008299 | 0.0082987 | 0.4772708 | 49 | JUN/STAT1/IRF1/SOCS1/IL10RB/… |
| HALLMARK_IL2_STAT5_SIGNALING | 1.600515 | 0.4456460 | 0.0013672 | 0.0097654 | 0.4550599 | 124 | RHOB/KLF6/MYC/PLIN2/IFITM3/… |
| HALLMARK_ESTROGEN_RESPONSE_EARLY | 1.624859 | 0.4931969 | 0.0051265 | 0.0284806 | 0.4070179 | 67 | MYC/AREG/TOB1/FOS/LRIG1/… |
| HALLMARK_HYPOXIA | 1.464556 | 0.4187076 | 0.0141024 | 0.0705121 | 0.3807304 | 100 | JUN/KLF6/PLIN2/PPP1R15A/FOS/… |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 1.564335 | 0.4953251 | 0.0173514 | 0.0722976 | 0.3524879 | 52 | RHOB/JUN/EMP3/AREG/TGFBI/… |
| HALLMARK_ESTROGEN_RESPONSE_LATE | 1.471464 | 0.4399010 | 0.0168521 | 0.0722976 | 0.3524879 | 73 | AREG/FOS/SGK1/NXT1/LARGE1/… |
| HALLMARK_FATTY_ACID_METABOLISM | -1.397312 | -0.3773208 | 0.0240373 | 0.0924511 | 0.3524879 | 102 | ACO2/NSDHL/ECI2/RETSAT/HSP90AA1/… |
| HALLMARK_OXIDATIVE_PHOSPHORYLATION | -1.507742 | -0.3757902 | 0.0009974 | 0.0083119 | 0.4550599 | 190 | NDUFS3/ACO2/MTX2/CASP7/TOMM70/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 2.162940 | 0.6192789 | 0.0000000 | 0.0000000 | 0.8012156 | 130 | CD69/SGK1/SOCS3/NFKBIA/IFNGR2/… |
| HALLMARK_IL2_STAT5_SIGNALING | 2.039593 | 0.5811499 | 0.0000000 | 0.0000007 | 0.7337620 | 136 | PIM1/PTGER2/TNFRSF4/TNFRSF18/SOCS2/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 2.055382 | 0.6083283 | 0.0000002 | 0.0000032 | 0.6901325 | 103 | CD69/PTGER2/NFKBIA/IFNGR2/OSM/… |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | 2.085060 | 0.6931223 | 0.0000017 | 0.0000208 | 0.6435518 | 50 | PIM1/SOCS3/IFNGR2/JUN/TNFRSF1A/… |
| HALLMARK_HYPOXIA | 1.952689 | 0.5823359 | 0.0000025 | 0.0000252 | 0.6272567 | 99 | PIM1/FOS/JUN/ZFP36/LDHA/… |
| HALLMARK_UV_RESPONSE_UP | 1.814924 | 0.5412512 | 0.0000541 | 0.0004512 | 0.5573322 | 99 | NFKBIA/FOS/SIGMAR1/POLG2/JUNB/… |
| HALLMARK_ALLOGRAFT_REJECTION | 1.738773 | 0.4936802 | 0.0000857 | 0.0006122 | 0.5384341 | 139 | EIF5A/IFNGR2/IL16/CD40LG/SOCS1/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.697457 | 0.4750014 | 0.0001210 | 0.0007562 | 0.5384341 | 159 | PIM1/CD69/ARID5B/SOCS3/NFKBIA/… |
| HALLMARK_ESTROGEN_RESPONSE_LATE | 1.797445 | 0.5662766 | 0.0006332 | 0.0035175 | 0.4772708 | 69 | SGK1/AREG/FOS/ZFP36/NXT1/… |
| HALLMARK_TGF_BETA_SIGNALING | 1.630331 | 0.5733072 | 0.0053376 | 0.0242618 | 0.4070179 | 38 | IFNGR2/ID3/JUNB/UBE2D3/SMURF2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_COMPLEMENT | 2.170216 | 0.6672156 | 0.0000000 | 0.0000006 | 0.7477397 | 137 | CLU/PIM1/S100A12/S100A9/CTSD/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.993342 | 0.5952694 | 0.0000004 | 0.0000090 | 0.6749629 | 173 | PIM1/FCGR1A/HLA-DRB1/NFKBIA/CXCL10/… |
| HALLMARK_APOPTOSIS | 1.929190 | 0.6000240 | 0.0000246 | 0.0004097 | 0.5756103 | 125 | CLU/GNA15/TXNIP/GADD45B/EMP1/… |
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 1.837970 | 0.5585377 | 0.0001026 | 0.0012823 | 0.5384341 | 150 | MAP3K8/BCL2A1/NFKBIA/CXCL10/MARCKS/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.844172 | 0.5735815 | 0.0001305 | 0.0013048 | 0.5188481 | 125 | ADM/CD82/NFKBIA/CXCL10/GNA15/… |
| HALLMARK_CHOLESTEROL_HOMEOSTASIS | 1.851895 | 0.6407820 | 0.0003801 | 0.0027152 | 0.4984931 | 61 | CLU/FABP5/FADS2/FDPS/S100A11/… |
| HALLMARK_IL2_STAT5_SIGNALING | 1.776333 | 0.5500254 | 0.0003491 | 0.0027152 | 0.4984931 | 130 | PIM1/MAP3K8/CCND3/SLC39A8/CXCL10/… |
| HALLMARK_HYPOXIA | 1.779982 | 0.5631392 | 0.0007326 | 0.0045790 | 0.4772708 | 109 | PIM1/ADM/LDHA/SLC2A3/TGFBI/… |
| HALLMARK_COAGULATION | 1.785994 | 0.6332655 | 0.0010480 | 0.0055257 | 0.4550599 | 53 | CLU/GNB2/CTSB/SERPING1/MAFF/… |
| HALLMARK_MYOGENESIS | 1.803834 | 0.5986199 | 0.0016842 | 0.0076553 | 0.4550599 | 78 | CLU/TNNT1/GADD45B/MEF2C/IGFBP7/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_ESTROGEN_RESPONSE_LATE | 1.798427 | 0.5196117 | 0.0047554 | 0.0792569 | 0.4070179 | 48 | AREG/S100A9/TSPAN13/ISG20/ATP2B4/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.557464 | 0.3934974 | 0.0034506 | 0.0792569 | 0.4317077 | 128 | IFI27/IFITM3/HLA-DQA1/FCGR1A/HLA-DRB1/… |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 1.798903 | 0.5420057 | 0.0074444 | 0.0930552 | 0.4070179 | 36 | AREG/VCAN/JUN/WIPF1/BASP1 |
| HALLMARK_OXIDATIVE_PHOSPHORYLATION | -1.360880 | -0.4361828 | 0.0041952 | 0.0792569 | 0.4070179 | 180 | TCIRG1/ACO2/UQCRFS1/ATP6V1G1/NDUFA4/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_MITOTIC_CELL_CYCLE_PROCESS | 2.189977 | 0.6119397 | 0.0000000 | 0.0000000 | 1.3188888 | 560 | STMN1/MKI67/UBE2C/CDC20/MCM4/… |
| GOBP_MITOTIC_NUCLEAR_DIVISION | 2.265175 | 0.6666883 | 0.0000000 | 0.0000000 | 1.0476265 | 239 | MKI67/UBE2C/CDC20/CENPF/TPX2/… |
| GOBP_ORGANELLE_FISSION | 2.164888 | 0.6197788 | 0.0000000 | 0.0000000 | 1.0476265 | 346 | MKI67/UBE2C/CDC20/TOP2A/CENPF/… |
| GOBP_SISTER_CHROMATID_SEGREGATION | 2.270348 | 0.6893656 | 0.0000000 | 0.0000000 | 0.9325952 | 167 | UBE2C/CDC20/TOP2A/CENPF/PLK1/… |
| GOBP_MITOTIC_SISTER_CHROMATID_SEGREGATION | 2.229011 | 0.6878440 | 0.0000000 | 0.0000000 | 0.8634154 | 147 | UBE2C/CDC20/CENPF/PLK1/PTTG1/… |
| GOBP_PROTEIN_DNA_COMPLEX_ASSEMBLY | 2.183707 | 0.6743030 | 0.0000000 | 0.0000000 | 0.8390889 | 148 | H3C2/CENPF/ASF1B/CENPN/CDC45/… |
| GOBP_MITOTIC_SPINDLE_ORGANIZATION | 2.195368 | 0.6955760 | 0.0000000 | 0.0000005 | 0.7881868 | 109 | STMN1/CDC20/TPX2/PLK1/KIF23/… |
| GOBP_REGULATION_OF_CHROMOSOME_SEGREGATION | 2.226644 | 0.7419976 | 0.0000000 | 0.0000043 | 0.7477397 | 73 | MKI67/UBE2C/CENPF/PLK1/ZWINT/… |
| GOBP_METAPHASE_ANAPHASE_TRANSITION_OF_CELL_CYCLE | 2.157218 | 0.7545796 | 0.0000002 | 0.0000364 | 0.6901325 | 55 | UBE2C/CENPF/PLK1/ZWINT/MAD2L1/… |
| GOBP_REGULATION_OF_CHROMOSOME_SEPARATION | 2.158176 | 0.7428544 | 0.0000002 | 0.0000501 | 0.6749629 | 61 | UBE2C/CENPF/PLK1/PTTG1/ZWINT/… |
| GOBP_PROCESS_UTILIZING_AUTOPHAGIC_MECHANISM | -1.284567 | -0.2997422 | 0.0020314 | 0.0825689 | 0.4317077 | 408 | WDR45B/C9orf72/ERCC4/SIRT1/PSEN1/… |
| GOBP_POSITIVE_REGULATION_OF_PROTEIN_EXIT_FROM_ENDOPLASMIC_RETICULUM | -1.788954 | -0.8818964 | 0.0018853 | 0.0783333 | 0.4550599 | 7 | TMEM30A/BCAP31/EDEM1 |
| GOBP_NEGATIVE_REGULATION_OF_ACTIN_NUCLEATION | -1.789769 | -0.9239111 | 0.0007261 | 0.0370781 | 0.4772708 | 6 | HIP1R/CORO1A/GMFB |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 1.941814 | 0.4773758 | 0.0000000 | 0.0000007 | 0.8390889 | 375 | IGLV7-46/IGLV3-9/IGHV3-72/IGHV3-49/IGKV1D-17/… |
| GOBP_POSITIVE_REGULATION_OF_B_CELL_ACTIVATION | 2.108893 | 0.5992717 | 0.0000000 | 0.0001095 | 0.7337620 | 107 | IGHV3-72/IGHV3-49/IGHV6-1/PELI1/IGHV7-81/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE_MEDIATED_BY_CIRCULATING_IMMUNOGLOBULIN | 2.155604 | 0.6492334 | 0.0000001 | 0.0002414 | 0.7049757 | 75 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOBP_B_CELL_RECEPTOR_SIGNALING_PATHWAY | 2.021007 | 0.5799128 | 0.0000006 | 0.0009960 | 0.6594444 | 101 | IGHV3-72/IGHV3-49/IGHV6-1/BAX/IGHV7-81/… |
| GOBP_B_CELL_ACTIVATION | 1.844998 | 0.4695816 | 0.0000007 | 0.0009960 | 0.6594444 | 244 | IGHV3-72/IGHV3-49/KLF6/IGHV6-1/PELI1/… |
| GOBP_COMPLEMENT_ACTIVATION | 2.107365 | 0.6394987 | 0.0000011 | 0.0011963 | 0.6435518 | 70 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOBP_PHAGOCYTOSIS_RECOGNITION | 2.057115 | 0.6349051 | 0.0000050 | 0.0033870 | 0.6105269 | 65 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOBP_MEMBRANE_INVAGINATION | 1.968101 | 0.5630140 | 0.0000050 | 0.0033870 | 0.6105269 | 102 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOBP_CELL_RECOGNITION | 1.966266 | 0.5547062 | 0.0000041 | 0.0033870 | 0.6105269 | 112 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOBP_GLIAL_CELL_PROLIFERATION | 1.963143 | 0.7576787 | 0.0002560 | 0.0796927 | 0.4984931 | 21 | MYC/AREG/TSPO/PPP1CC/PLAG1/… |
| GOBP_REGULATION_OF_CARTILAGE_DEVELOPMENT | -1.903577 | -0.7564689 | 0.0002825 | 0.0821368 | 0.4984931 | 17 | SMPD3/TRPS1/SMAD3/SOX5/GLG1/… |
| GOBP_CYTOPLASMIC_TRANSLATION | -1.796263 | -0.4698042 | 0.0000408 | 0.0203316 | 0.5573322 | 135 | RPL41/RPS15A/RPS16/RPL37A/RPL29/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_RESPONSE_TO_ORGANIC_CYCLIC_COMPOUND | 1.892491 | 0.4842527 | 0.0000000 | 0.0000014 | 0.8140358 | 367 | PIM1/SGK1/PTGER2/AREG/ZFP36L1/… |
| GOBP_RECEPTOR_SIGNALING_PATHWAY_VIA_STAT | 2.033450 | 0.6192620 | 0.0000006 | 0.0003507 | 0.6594444 | 84 | SOCS3/TNFRSF18/SOCS2/OSM/TNFRSF1A/… |
| GOBP_RESPONSE_TO_CORTICOSTEROID | 2.075931 | 0.6506483 | 0.0000015 | 0.0005411 | 0.6435518 | 67 | SGK1/AREG/ZFP36L1/FOS/ZFP36/… |
| GOBP_TYROSINE_PHOSPHORYLATION_OF_STAT_PROTEIN | 2.018249 | 0.7167395 | 0.0000220 | 0.0053147 | 0.5756103 | 34 | SOCS3/TNFRSF18/OSM/TNFRSF1A/SOCS1/… |
| GOBP_RESPONSE_TO_CAMP | 1.988106 | 0.7060350 | 0.0000484 | 0.0097861 | 0.5573322 | 34 | AREG/ZFP36L1/FDX1/ITPR2/RAP1A/… |
| GOBP_RESPONSE_TO_PURINE_CONTAINING_COMPOUND | 1.952358 | 0.6489001 | 0.0000749 | 0.0137962 | 0.5384341 | 48 | AREG/ZFP36L1/FOS/TRPM2/FDX1/… |
| GOBP_STEM_CELL_PROLIFERATION | 1.910631 | 0.7318622 | 0.0001598 | 0.0225343 | 0.5188481 | 25 | PIM1/ZFP36L1/N4BP2L2/LTBP3/EIF2AK2/… |
| GOBP_KERATINOCYTE_PROLIFERATION | 1.910881 | 0.8309192 | 0.0002265 | 0.0268445 | 0.5188481 | 13 | AREG/ZFP36L1/ZFP36/KLF9/BCL11B/… |
| GOBP_REGULATION_OF_KERATINOCYTE_PROLIFERATION | 1.901463 | 0.8636862 | 0.0002377 | 0.0273346 | 0.5188481 | 11 | AREG/ZFP36L1/ZFP36/KLF9/BCL11B/… |
| GOBP_REGULATION_OF_STEM_CELL_PROLIFERATION | 1.932395 | 0.7924327 | 0.0004497 | 0.0409968 | 0.4984931 | 17 | PIM1/ZFP36L1/N4BP2L2/LTBP3/EIF2AK2/… |
| GOBP_ALCOHOL_CATABOLIC_PROCESS | -1.810971 | -0.6549556 | 0.0019646 | 0.0926203 | 0.4317077 | 22 | ALDH3B1/PTEN/GK/BPNT2/SYNJ1/… |
| GOBP_CEREBELLAR_CORTEX_DEVELOPMENT | -1.847074 | -0.6464517 | 0.0017256 | 0.0837711 | 0.4550599 | 24 | TTC21B/WNT7A/CLP1/HERC1/RORA/… |
| GOBP_EXCITATORY_SYNAPSE_ASSEMBLY | -1.825969 | -0.8700692 | 0.0013714 | 0.0770854 | 0.4550599 | 7 | PTEN/NPTN/WNT7A |
| GOBP_MITOCHONDRION_LOCALIZATION | -1.839853 | -0.5959025 | 0.0012277 | 0.0758517 | 0.4550599 | 33 | KAT2A/FEZ1/SLC4A5/LRPPRC/MFN1/… |
| GOBP_HINDBRAIN_MORPHOGENESIS | -1.880683 | -0.6900828 | 0.0010563 | 0.0695336 | 0.4550599 | 20 | TTC21B/WNT7A/HERC1/RORA/DAB1/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_ANTIGEN_VIA_MHC_CLASS_IB | -1.725770 | -0.9280066 | 0.0007618 | 0.0575309 | 0.4772708 | 5 | TAP2/HLA-G |
| GOBP_N_TERMINAL_PROTEIN_AMINO_ACID_MODIFICATION | -1.925206 | -0.6737969 | 0.0006795 | 0.0529149 | 0.4772708 | 24 | HHAT/METAP1D/NAA30/NAA40/NAA15/… |
| GOBP_AXONEMAL_DYNEIN_COMPLEX_ASSEMBLY | -1.854941 | -0.8636938 | 0.0004593 | 0.0413711 | 0.4984931 | 8 | DNAAF10/DNAI2/DNAAF5 |
| GOBP_LENS_DEVELOPMENT_IN_CAMERA_TYPE_EYE | -1.945022 | -0.6627393 | 0.0002943 | 0.0305581 | 0.4984931 | 27 | HIPK1/CRYBG3/SPRY1/WNT7A/TGFBR1/… |
| GOBP_INOSITOL_PHOSPHATE_METABOLIC_PROCESS | -1.988218 | -0.6906353 | 0.0002464 | 0.0279149 | 0.4984931 | 25 | PPIP5K1/PTEN/BPNT2/SYNJ1/INPP4A |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_HUMORAL_IMMUNE_RESPONSE | 2.163674 | 0.6594244 | 0.0000001 | 0.0000942 | 0.7049757 | 130 | CLU/RNASE2/S100A12/HLA-DRB1/S100A9/… |
| GOBP_ANTIMICROBIAL_HUMORAL_RESPONSE | 2.153263 | 0.7718184 | 0.0000114 | 0.0040128 | 0.5933255 | 46 | RNASE2/S100A12/S100A9/RNASE3/CXCL10/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_PEPTIDE_ANTIGEN_VIA_MHC_CLASS_II | 2.096229 | 0.8575825 | 0.0000522 | 0.0091739 | 0.5573322 | 23 | HLA-DRB1/HLA-DPB1/CTSD/HLA-DPA1/HLA-DRA/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_PEPTIDE_ANTIGEN | 2.114744 | 0.8018881 | 0.0001064 | 0.0153056 | 0.5384341 | 32 | HLA-DRB1/HLA-DPB1/CTSD/HLA-DPA1/HLA-DRA/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_ANTIGEN | 2.097620 | 0.7686913 | 0.0001359 | 0.0183338 | 0.5188481 | 39 | HLA-DRB1/HLA-DPB1/CTSD/HLA-DPA1/HLA-DRA/… |
| GOBP_DEFENSE_RESPONSE_TO_FUNGUS | 2.079787 | 0.8436805 | 0.0001465 | 0.0183338 | 0.5188481 | 24 | S100A8/S100A12/S100A9/MPO/CX3CR1/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II | 2.065245 | 0.8114667 | 0.0001416 | 0.0183338 | 0.5188481 | 27 | HLA-DRB1/HLA-DPB1/CTSD/HLA-DPA1/HLA-DRA/… |
| GOBP_IMMUNOGLOBULIN_PRODUCTION_INVOLVED_IN_IMMUNOGLOBULIN_MEDIATED_IMMUNE_RESPONSE | 2.035406 | 0.6981273 | 0.0001513 | 0.0183338 | 0.5188481 | 57 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| GOBP_DEFENSE_RESPONSE_TO_GRAM_NEGATIVE_BACTERIUM | 2.036333 | 0.7868104 | 0.0002425 | 0.0266826 | 0.5188481 | 29 | RNASE2/RNASE3/H2BC11/CD4/MPEG1/… |
| GOBP_PEPTIDE_ANTIGEN_ASSEMBLY_WITH_MHC_PROTEIN_COMPLEX | 2.066526 | 0.8862317 | 0.0004514 | 0.0438580 | 0.4984931 | 17 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| GOBP_MRNA_PROCESSING | -1.390379 | -0.3320768 | 0.0007022 | 0.0571538 | 0.4772708 | 402 | TENT4A/SRSF4/RRP1B/CDK11A/DHX15/… |
| GOBP_RNA_PROCESSING | -1.327048 | -0.3035917 | 0.0000869 | 0.0134643 | 0.5384341 | 781 | TENT4A/SRSF4/PTCD1/METTL25B/RRP1B/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_PEPTIDE_ANTIGEN_ASSEMBLY_WITH_MHC_CLASS_II_PROTEIN_COMPLEX | 2.327248 | 0.8866218 | 0.0000186 | 0.0229055 | 0.5756103 | 14 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOBP_PEPTIDE_ANTIGEN_ASSEMBLY_WITH_MHC_PROTEIN_COMPLEX | 2.307622 | 0.8416140 | 0.0000307 | 0.0289097 | 0.5573322 | 18 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOBP_DEFENSE_RESPONSE_TO_FUNGUS | 2.280060 | 0.8686447 | 0.0000660 | 0.0430338 | 0.5384341 | 14 | S100A8/S100A9/S100A12/CX3CR1 |
| GOBP_NEUTROPHIL_CHEMOTAXIS | 2.181337 | 0.7034697 | 0.0000832 | 0.0458773 | 0.5384341 | 33 | S100A8/S100A9/S100A12/CD74 |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_PEPTIDE_ANTIGEN_VIA_MHC_CLASS_II | 2.221043 | 0.7725118 | 0.0001179 | 0.0497257 | 0.5384341 | 22 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_ANTIGEN | 2.096504 | 0.6612864 | 0.0001518 | 0.0604769 | 0.5188481 | 37 | HLA-DPB1/HLA-DQA1/CD1C/HLA-DRB1/HLA-DPA1/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_PEPTIDE_ANTIGEN | 2.129781 | 0.6980586 | 0.0002221 | 0.0725941 | 0.5188481 | 30 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II | 2.147929 | 0.7386930 | 0.0002888 | 0.0767070 | 0.4984931 | 24 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOBP_RESPONSE_TO_FUNGUS | 2.197113 | 0.7588724 | 0.0003386 | 0.0793038 | 0.4984931 | 23 | S100A8/S100A9/S100A12/CX3CR1 |
| GOBP_REGULATION_OF_VIRAL_ENTRY_INTO_HOST_CELL | 2.112268 | 0.7346781 | 0.0005561 | 0.0972755 | 0.4772708 | 22 | IFITM3/HLA-DRB1/IFITM1/CD74/LY6E/… |
| GOBP_RESPONSE_TO_NERVE_GROWTH_FACTOR | -1.742844 | -0.7272221 | 0.0004195 | 0.0869578 | 0.4984931 | 20 | ACAP2/EIF4A3/ARF6/KIDINS220/APP/… |
| GOBP_TOOTH_MINERALIZATION | -1.560006 | -0.9840261 | 0.0003538 | 0.0793038 | 0.4984931 | 3 | TCIRG1 |
| GOBP_VITAMIN_D_RECEPTOR_SIGNALING_PATHWAY | -1.626882 | -0.9634434 | 0.0002885 | 0.0767070 | 0.4984931 | 4 | RXRA/SNW1 |
| GOBP_REGULATION_OF_RESPONSE_TO_EXTRACELLULAR_STIMULUS | -1.686575 | -0.9469819 | 0.0002329 | 0.0725941 | 0.5188481 | 5 | RXRA/SNW1 |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_CYTOSKELETAL_MOTOR_ACTIVITY | 1.986712 | 0.7037084 | 0.0000251 | 0.0167721 | 0.5756103 | 51 | MYL6B/KIF23/KIFC1/CENPE/KIF2C/… |
| GOMF_MICROTUBULE_MOTOR_ACTIVITY | 1.974092 | 0.7303336 | 0.0000292 | 0.0167721 | 0.5756103 | 38 | KIF23/KIFC1/CENPE/KIF2C/KIF15/… |
| GOMF_MODIFIED_AMINO_ACID_BINDING | 1.951044 | 0.7127251 | 0.0000401 | 0.0167721 | 0.5573322 | 42 | TYMS/DHFR/UROS/SYTL2/PTGES2/… |
| GOMF_SINGLE_STRANDED_DNA_BINDING | 1.832294 | 0.5844342 | 0.0000303 | 0.0167721 | 0.5756103 | 110 | MCM4/CDC45/MCM7/MCM2/RAD51/… |
| GOMF_CYCLIN_DEPENDENT_PROTEIN_SERINE_THREONINE_KINASE_REGULATOR_ACTIVITY | 1.944380 | 0.7381697 | 0.0002052 | 0.0687032 | 0.5188481 | 33 | CKS2/CCNB2/CCNA2/CCNB1/CDKN1B/… |
| GOMF_CYSTEINE_TYPE_ENDOPEPTIDASE_INHIBITOR_ACTIVITY | 1.900817 | 0.7730034 | 0.0005785 | 0.0811981 | 0.4772708 | 24 | BIRC5/PTTG1/TNFSF14/PRDX3/XIAP/… |
| GOMF_TRANSLATION_REPRESSOR_ACTIVITY_MRNA_REGULATORY_ELEMENT_BINDING | 1.865708 | 0.8725790 | 0.0004862 | 0.0811981 | 0.4984931 | 12 | TYMS/DHFR/SHMT2 |
| GOMF_DNA_REPLICATION_ORIGIN_BINDING | 1.826807 | 0.8264052 | 0.0005821 | 0.0811981 | 0.4772708 | 15 | CDC45/MCM2/MCM10/MCM5/HSPD1/… |
| GOMF_SINGLE_STRANDED_DNA_HELICASE_ACTIVITY | 1.854026 | 0.7590126 | 0.0011215 | 0.0938730 | 0.4550599 | 23 | MCM4/MCM7/MCM2/RAD51/MCM5/… |
| GOMF_TRANSLATION_REPRESSOR_ACTIVITY | 1.845158 | 0.7867681 | 0.0012523 | 0.0998226 | 0.4550599 | 19 | TYMS/DHFR/SHMT2 |
| GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME | -1.498186 | -0.3895763 | 0.0009113 | 0.0897317 | 0.4772708 | 149 | RPS4X/MRPL2/RPL11/RPL10A/RPS23/… |
| GOMF_CATALYTIC_ACTIVITY_ACTING_ON_RNA | -1.382495 | -0.3248788 | 0.0006476 | 0.0833946 | 0.4772708 | 330 | EXOG/CNOT8/DDX46/FARSB/APEX1/… |
| GOMF_TELOMERASE_INHIBITOR_ACTIVITY | -1.591999 | -0.9807853 | 0.0004116 | 0.0811981 | 0.4984931 | 3 | ERCC4/PIF1 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_IMMUNOGLOBULIN_RECEPTOR_BINDING | 2.223378 | 0.6808708 | 3.00e-07 | 0.0004202 | 0.6749629 | 61 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOMF_ANTIGEN_BINDING | 1.865352 | 0.5144477 | 1.83e-05 | 0.0101870 | 0.5756103 | 128 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME | -1.979670 | -0.5056111 | 1.00e-06 | 0.0008148 | 0.6435518 | 150 | RPL41/RPS15A/RPS16/RPL37A/MRPS23/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_SIGNALING_RECEPTOR_REGULATOR_ACTIVITY | 2.032457 | 0.5963079 | 0.0000007 | 0.0005489 | 0.6594444 | 100 | AREG/IL16/OSM/IL32/CD40LG/… |
| GOMF_TUMOR_NECROSIS_FACTOR_ACTIVATED_RECEPTOR_ACTIVITY | 1.718074 | 0.9968417 | 0.0000004 | 0.0005489 | 0.6749629 | 4 | TNFRSF4/TNFRSF18 |
| GOMF_CYTOKINE_ACTIVITY | 2.161221 | 0.7274334 | 0.0000022 | 0.0012280 | 0.6272567 | 42 | IL16/OSM/IL32/CD40LG/GREM2/… |
| GOMF_CATALYTIC_ACTIVITY_ACTING_ON_A_NUCLEIC_ACID | -1.364325 | -0.2998334 | 0.0004322 | 0.0902781 | 0.4984931 | 493 | DHX8/ALKBH1/POLR1E/AGO1/FTO/… |
| GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME | -1.587901 | -0.4017302 | 0.0002475 | 0.0590874 | 0.4984931 | 149 | RPLP2/RPS7/RPL35/MRPS33/MRPS14/… |
| GOMF_INOSITOL_HEXAKISPHOSPHATE_KINASE_ACTIVITY | -1.769559 | -0.9491201 | 0.0001786 | 0.0497426 | 0.5188481 | 5 | PPIP5K1 |
| GOMF_1_PHOSPHATIDYLINOSITOL_4_KINASE_ACTIVITY | -1.607687 | -0.9900341 | 0.0001210 | 0.0404335 | 0.5384341 | 3 | PI4KA/PI4KB |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_MHC_PROTEIN_COMPLEX_BINDING | 2.048490 | 0.7798483 | 0.0001983 | 0.0349329 | 0.5188481 | 30 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| GOMF_MHC_CLASS_II_PROTEIN_COMPLEX_BINDING | 2.041239 | 0.8134498 | 0.0001137 | 0.0349329 | 0.5384341 | 24 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| GOMF_ENDONUCLEASE_ACTIVITY_ACTIVE_WITH_EITHER_RIBO_OR_DEOXYRIBONUCLEIC_ACIDS_AND_PRODUCING_3_PHOSPHOMONOESTERS | 1.989349 | 0.9231102 | 0.0000922 | 0.0349329 | 0.5384341 | 12 | RNASE2/RNASE1/RNASE4 |
| GOMF_CALCIUM_ION_BINDING | 1.763735 | 0.5082036 | 0.0000783 | 0.0349329 | 0.5384341 | 219 | S100A8/S100A9/DYSF/ASPH/NOTCH2/… |
| GOMF_PEPTIDE_BINDING | 1.742342 | 0.5194872 | 0.0001907 | 0.0349329 | 0.5188481 | 164 | CLU/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/… |
| GOMF_MISFOLDED_PROTEIN_BINDING | 1.988678 | 0.7831213 | 0.0002979 | 0.0453585 | 0.4984931 | 25 | CLU/SDF2L1/HSPA5/DNAJB11/DNAJC10 |
| GOMF_SIGNALING_RECEPTOR_REGULATOR_ACTIVITY | 1.831525 | 0.5684759 | 0.0006799 | 0.0875983 | 0.4772708 | 111 | RETN/ADM/CXCL10/IL1B/CXCL8/… |
| GOMF_IMMUNE_RECEPTOR_ACTIVITY | 1.804870 | 0.6002018 | 0.0008141 | 0.0880927 | 0.4772708 | 70 | FCGR1A/HLA-DRB1/HLA-DPA1/HLA-DRA/CX3CR1/… |
| GOMF_PEPTIDE_ANTIGEN_BINDING | 1.943032 | 0.7944813 | 0.0009100 | 0.0896624 | 0.4772708 | 21 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DQB1/… |
| GOMF_ELECTRON_TRANSFER_ACTIVITY | 1.791593 | 0.5657850 | 0.0010685 | 0.0994256 | 0.4550599 | 100 | ASPH/MT-ND6/CYC1/NDUFB9/ETFB/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_RAGE_RECEPTOR_BINDING | 2.112261 | 0.9811787 | 0.0000035 | 0.0018163 | 0.6272567 | 6 | S100A8/S100A12/HMGB2 |
| GOMF_MHC_CLASS_II_RECEPTOR_ACTIVITY | 2.263292 | 0.9449148 | 0.0000056 | 0.0021814 | 0.6105269 | 9 | HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/HLA-DQB1/… |
| GOMF_MHC_CLASS_II_PROTEIN_COMPLEX_BINDING | 2.387379 | 0.8256735 | 0.0000119 | 0.0037266 | 0.5933255 | 23 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOMF_MHC_PROTEIN_COMPLEX_BINDING | 2.317274 | 0.7720742 | 0.0000212 | 0.0055387 | 0.5756103 | 27 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOMF_IMMUNE_RECEPTOR_ACTIVITY | 2.172245 | 0.6511305 | 0.0000290 | 0.0064749 | 0.5756103 | 48 | HLA-DQA1/FCGR1A/IFNGR2/HLA-DRB1/HLA-DPA1/… |
| GOMF_FATTY_ACID_BINDING | 2.353652 | 0.8792839 | 0.0000439 | 0.0085849 | 0.5573322 | 15 | S100A8/S100A9/PTGDS |
| GOMF_TOLL_LIKE_RECEPTOR_BINDING | 2.211241 | 0.8792482 | 0.0000511 | 0.0088869 | 0.5573322 | 11 | S100A8/S100A9/TLR1 |
| GOMF_ANTIGEN_BINDING | 2.131335 | 0.6375734 | 0.0001354 | 0.0201226 | 0.5188481 | 47 | HLA-DPB1/HLA-DQA1/CD1C/HLA-DRB1/HLA-DPA1/… |
| GOMF_LIPOPEPTIDE_BINDING | 2.093977 | 0.9369010 | 0.0001414 | 0.0201226 | 0.5188481 | 7 | CD1C/CD14/CD1D/CD1E |
| GOMF_PEPTIDE_ANTIGEN_BINDING | 2.084922 | 0.7721010 | 0.0004730 | 0.0569361 | 0.4984931 | 16 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOMF_PROTON_TRANSMEMBRANE_TRANSPORTER_ACTIVITY | -1.589458 | -0.5593554 | 0.0006385 | 0.0713799 | 0.4772708 | 65 | TCIRG1/UQCRFS1/ATP6V1G1/UQCR10/DMAC2L/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_CHROMOSOMAL_REGION | 2.123823 | 0.6151169 | 0.0000000 | 0.0000000 | 0.9653278 | 295 | BIRC5/CENPM/MCM4/TOP2A/CENPF/… |
| GOCC_CONDENSED_CHROMOSOME | 2.234498 | 0.6674972 | 0.0000000 | 0.0000000 | 0.9545416 | 199 | MKI67/BIRC5/CENPM/TOP2A/CENPF/… |
| GOCC_NUCLEAR_CHROMOSOME | 2.139723 | 0.6447094 | 0.0000000 | 0.0000000 | 0.8390889 | 180 | BIRC5/MCM4/H3C2/TOP2A/PLK1/… |
| GOCC_CONDENSED_CHROMOSOME_CENTROMERIC_REGION | 2.147046 | 0.6638263 | 0.0000000 | 0.0000001 | 0.8140358 | 143 | BIRC5/CENPM/CENPF/CENPU/PLK1/… |
| GOCC_CHROMOSOME_CENTROMERIC_REGION | 2.036108 | 0.6205074 | 0.0000000 | 0.0000036 | 0.7337620 | 161 | BIRC5/CENPM/CENPF/CENPU/PLK1/… |
| GOCC_MITOTIC_SPINDLE | 1.954834 | 0.6148481 | 0.0000008 | 0.0000761 | 0.6594444 | 124 | TPX2/PLK1/CKAP2L/CDK1/MAD2L1/… |
| GOCC_DNA_PACKAGING_COMPLEX | 2.007591 | 0.6538712 | 0.0000015 | 0.0001256 | 0.6435518 | 92 | H3C2/H2AZ1/H4C3/H1-2/NCAPG/… |
| GOCC_SPINDLE_POLE | 1.933980 | 0.6001730 | 0.0000021 | 0.0001566 | 0.6272567 | 137 | CDC20/CENPF/TPX2/PLK1/CKAP2L/… |
| GOCC_CYCLIN_DEPENDENT_PROTEIN_KINASE_HOLOENZYME_COMPLEX | 2.005038 | 0.7465879 | 0.0000299 | 0.0013370 | 0.5756103 | 39 | PCNA/CKS2/CCNB2/CDK1/CCNA2/… |
| GOCC_T_CELL_RECEPTOR_COMPLEX | 1.966689 | 0.6708984 | 0.0000287 | 0.0013370 | 0.5756103 | 67 | TRAV38-2DV8/TRGV5/TRAV1-1/TRBV20-1/TRBV7-9/… |
| GOCC_RIBOSOMAL_SUBUNIT | -1.559063 | -0.3980319 | 0.0006768 | 0.0190078 | 0.4772708 | 161 | HBA1/RPS4X/MRPL2/RPL11/RPL10A/… |
| GOCC_AIM2_INFLAMMASOME_COMPLEX | -1.657264 | -0.9593314 | 0.0005532 | 0.0163421 | 0.4772708 | 4 | CASP4 |
| GOCC_CYTOSOLIC_RIBOSOME | -1.715178 | -0.4735702 | 0.0004986 | 0.0158102 | 0.4772708 | 90 | HBA1/RPS4X/RPL11/RPL10A/RPS23/… |
| GOCC_RIBOSOME | -1.545687 | -0.3855265 | 0.0002511 | 0.0094941 | 0.4984931 | 192 | HBA1/RPS4X/MRPL2/RPL11/APEX1/… |
| GOCC_POLYSOME | -1.841048 | -0.5500895 | 0.0002322 | 0.0091308 | 0.5188481 | 58 | RPS4X/RPL11/MCRS1/RPL10A/RPS23/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_IMMUNOGLOBULIN_COMPLEX | 2.488389 | 0.6890838 | 0.0000000 | 0.0000000 | 0.9865463 | 124 | IGLV7-46/IGLV3-9/IGHV3-72/IGHV3-49/IGLV2-18/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX_CIRCULATING | 2.234906 | 0.6869169 | 0.0000003 | 0.0001321 | 0.6749629 | 61 | IGHV3-72/IGHV3-49/IGHV6-1/IGHV7-81/IGHV2-70/… |
| GOCC_TRANSCRIPTION_FACTOR_AP_1_COMPLEX | 1.788564 | 0.9770407 | 0.0000151 | 0.0024897 | 0.5933255 | 5 | JUN/JUND/FOS/JUNB |
| GOCC_EXTERNAL_SIDE_OF_PLASMA_MEMBRANE | 1.636280 | 0.4250024 | 0.0001710 | 0.0188108 | 0.5188481 | 211 | CD69/IGHV3-72/IGHV3-49/IGHV6-1/S1PR1/… |
| GOCC_CELL_SURFACE | 1.425334 | 0.3504498 | 0.0007571 | 0.0681356 | 0.4772708 | 362 | CD69/IGHV3-72/IGHV3-49/IGHV6-1/S1PR1/… |
| GOCC_MEMBRANE_PROTEIN_COMPLEX | -1.349865 | -0.2978227 | 0.0004103 | 0.0406191 | 0.4984931 | 662 | NDUFS3/LIN7C/DNAJC11/HLA-DQA1/NCF4/… |
| GOCC_RIBOSOME | -1.708962 | -0.4277936 | 0.0000306 | 0.0037919 | 0.5573322 | 192 | RPL41/MRPL45/MRPL38/RPS15A/RPS16/… |
| GOCC_CYTOSOLIC_RIBOSOME | -1.910424 | -0.5256877 | 0.0000258 | 0.0036451 | 0.5756103 | 91 | RPL41/RPS15A/RPS16/RPL37A/RPL29/… |
| GOCC_LARGE_RIBOSOMAL_SUBUNIT | -1.948565 | -0.5290340 | 0.0000053 | 0.0011309 | 0.6105269 | 104 | RPL41/MRPL45/MRPL38/RPL29/UBA52/… |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | -2.069381 | -0.6357955 | 0.0000057 | 0.0011309 | 0.6105269 | 50 | RPL41/RPL37A/RPL29/UBA52/RPL34/… |
| GOCC_RIBOSOMAL_SUBUNIT | -1.916937 | -0.4893259 | 0.0000009 | 0.0002953 | 0.6594444 | 161 | RPL41/MRPL45/MRPL38/RPS15A/RPS16/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_CELL_SURFACE | 1.735673 | 0.4506186 | 0.0000002 | 0.0002110 | 0.6901325 | 357 | CD69/TNFRSF4/AREG/TNFRSF18/FLOT1/… |
| GOCC_MEMBRANE_MICRODOMAIN | 1.805244 | 0.5081181 | 0.0000083 | 0.0040672 | 0.5933255 | 154 | FLOT1/TNFRSF1A/HMOX1/SLC25A5/ADTRP/… |
| GOCC_EXTERNAL_SIDE_OF_PLASMA_MEMBRANE | 1.711477 | 0.4659494 | 0.0000205 | 0.0067246 | 0.5756103 | 212 | CD69/TNFRSF4/TNFRSF18/FLOT1/LAG3/… |
| GOCC_T_CELL_RECEPTOR_COMPLEX | 1.753329 | 0.5217590 | 0.0001415 | 0.0278002 | 0.5188481 | 98 | TRBV6-6/TRBV7-7/TRGV2/TRAV13-1/TRBV11-1/… |
| GOCC_TRANSCRIPTION_FACTOR_AP_1_COMPLEX | 1.686755 | 0.9375073 | 0.0004197 | 0.0515227 | 0.4984931 | 5 | FOS/JUN/JUNB |
| GOCC_CORTICAL_CYTOSKELETON | 1.763539 | 0.5885794 | 0.0006032 | 0.0658159 | 0.4772708 | 49 | CLDN5/FLOT1/GYPC/MYADM/COTL1/… |
| GOCC_TERTIARY_GRANULE_MEMBRANE | 1.791363 | 0.6048151 | 0.0009715 | 0.0954010 | 0.4772708 | 46 | CEACAM1/CYBA/TRPM2/SLC2A3/FCER1G/… |
| GOCC_AMPA_GLUTAMATE_RECEPTOR_COMPLEX | 1.651277 | 0.9177881 | 0.0013888 | 0.0956082 | 0.4550599 | 5 | ABHD12/OLFM2/DLG3 |
| GOCC_ANNULATE_LAMELLAE | 1.648001 | 0.9159676 | 0.0015081 | 0.0956082 | 0.4550599 | 5 | EIF5A/XPO1/TNPO3 |
| GOCC_CORTICAL_ACTIN_CYTOSKELETON | 1.723687 | 0.6035615 | 0.0016765 | 0.0968414 | 0.4550599 | 37 | CLDN5/FLOT1/MYADM/COTL1/CAPN2/… |
| GOCC_ENDOPLASMIC_RETICULUM_TUBULAR_NETWORK_MEMBRANE | -1.633336 | -0.9499817 | 0.0015578 | 0.0956082 | 0.4550599 | 4 | ATL3/LNPK |
| GOCC_ALPHA_DNA_POLYMERASE_PRIMASE_COMPLEX | -1.640033 | -0.9538771 | 0.0013196 | 0.0956082 | 0.4550599 | 4 | POLA1/PRIM2 |
| GOCC_CYTOSOLIC_RIBOSOME | -1.671907 | -0.4522215 | 0.0012793 | 0.0956082 | 0.4550599 | 88 | RPLP2/RPS7/RPL35/RPL6/RPL23A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_VESICLE_LUMEN | 2.078219 | 0.5956180 | 0.0000000 | 0.0000010 | 0.7614608 | 235 | CLU/RETN/HP/RNASE2/S100A8/… |
| GOCC_EXTERNAL_ENCAPSULATING_STRUCTURE | 2.173879 | 0.6683608 | 0.0000003 | 0.0000567 | 0.6749629 | 117 | CLU/S100A8/S100A9/CTSD/LGALS1/… |
| GOCC_VACUOLAR_LUMEN | 2.116920 | 0.6518459 | 0.0000013 | 0.0002170 | 0.6435518 | 115 | RETN/RNASE2/PLAC8/CTSD/MPO/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE_MEMBRANE | 2.075834 | 0.7476886 | 0.0000235 | 0.0023517 | 0.5756103 | 41 | FCGR1A/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/… |
| GOCC_COLLAGEN_CONTAINING_EXTRACELLULAR_MATRIX | 2.036634 | 0.6476584 | 0.0000216 | 0.0023517 | 0.5756103 | 95 | S100A8/S100A9/CTSD/LGALS1/FGL2/… |
| GOCC_MHC_CLASS_II_PROTEIN_COMPLEX | 1.984915 | 0.8989870 | 0.0000564 | 0.0046474 | 0.5573322 | 14 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| GOCC_BLOOD_MICROPARTICLE | 1.983022 | 0.6843357 | 0.0001224 | 0.0093110 | 0.5188481 | 54 | CLU/HP/PFN1/IGLV3-21/ACTB/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE | 1.977163 | 0.6870066 | 0.0002900 | 0.0179254 | 0.4984931 | 52 | FCGR1A/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/… |
| GOCC_LUMENAL_SIDE_OF_MEMBRANE | 1.978420 | 0.7665233 | 0.0003744 | 0.0217799 | 0.4984931 | 28 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DQB1/… |
| GOCC_MHC_PROTEIN_COMPLEX | 1.985914 | 0.8095036 | 0.0005036 | 0.0262136 | 0.4772708 | 22 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| GOCC_STEREOCILIUM_BUNDLE | -2.048812 | -0.7930946 | 0.0000304 | 0.0027319 | 0.5756103 | 20 | MYO1C/FCHSD2/TRIOBP/ELMOD3/PAFAH1B1 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_CYTOSOLIC_RIBOSOME | 2.552043 | 0.6868500 | 0.0000000 | 0.0000000 | 0.8634154 | 88 | RPS3A/RPS15A/EIF2A/RPL4/RPL8/… |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | 2.435980 | 0.7259745 | 0.0000002 | 0.0000822 | 0.6901325 | 48 | RPL4/RPL10/RPL8/RPL34/RPL26/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE_MEMBRANE | 2.491602 | 0.8109419 | 0.0000003 | 0.0000862 | 0.6749629 | 30 | AREG/HLA-DPB1/HLA-DQA1/FCGR1A/HLA-DRB1/… |
| GOCC_CLATHRIN_COATED_VESICLE_MEMBRANE | 2.414883 | 0.7280867 | 0.0000015 | 0.0003474 | 0.6435518 | 45 | AREG/HLA-DPB1/HLA-DQA1/FCGR1A/HLA-DRB1/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE | 2.414769 | 0.7491360 | 0.0000026 | 0.0004912 | 0.6272567 | 37 | AREG/HLA-DPB1/HLA-DQA1/FCGR1A/HLA-DRB1/… |
| GOCC_MHC_CLASS_II_PROTEIN_COMPLEX | 2.389169 | 0.8919006 | 0.0000128 | 0.0015205 | 0.5933255 | 15 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOCC_MHC_PROTEIN_COMPLEX | 2.181840 | 0.7628724 | 0.0002100 | 0.0132288 | 0.5188481 | 21 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOCC_CYTOSOLIC_SMALL_RIBOSOMAL_SUBUNIT | 2.169121 | 0.6713940 | 0.0001849 | 0.0132288 | 0.5188481 | 38 | RPS3A/RPS15A/EIF2A/RPS27A/ISG15/… |
| GOCC_ER_TO_GOLGI_TRANSPORT_VESICLE_MEMBRANE | 2.160740 | 0.6572765 | 0.0001811 | 0.0132288 | 0.5188481 | 42 | AREG/HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/… |
| GOCC_LUMENAL_SIDE_OF_MEMBRANE | 2.144702 | 0.7323909 | 0.0009136 | 0.0435769 | 0.4772708 | 24 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| GOCC_INTRINSIC_COMPONENT_OF_POSTSYNAPTIC_MEMBRANE | -1.677693 | -0.8459252 | 0.0016450 | 0.0634290 | 0.4550599 | 8 | NPTN/CANX/CHRNB1/GABBR1 |
| GOCC_U2_TYPE_SPLICEOSOMAL_COMPLEX | -1.522253 | -0.5225778 | 0.0013099 | 0.0566509 | 0.4550599 | 82 | CASC3/SNW1/CWC15/EIF4A3/TXNL4A/… |
| GOCC_PROTEASOME_COMPLEX | -1.587913 | -0.5778117 | 0.0012897 | 0.0566509 | 0.4550599 | 50 | PSMB5/PSMC3/PSMB8/PSMA5/PAAF1/… |
| GOCC_PEPTIDASE_COMPLEX | -1.592547 | -0.5487597 | 0.0002357 | 0.0132288 | 0.5188481 | 79 | SUPT20H/PSMB5/PSMC3/ENY2/PSMB8/… |
| GOCC_ENDOPEPTIDASE_COMPLEX | -1.642985 | -0.5814710 | 0.0001667 | 0.0132288 | 0.5188481 | 62 | PSMB5/PSMC3/PSMB8/PSMA5/CAPN2/… |
| GOCC_CATALYTIC_COMPLEX | -1.337776 | -0.4097882 | 0.0000087 | 0.0013807 | 0.5933255 | 788 | WWP2/CCNT1/CASC3/SNW1/FBXL15/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_CELL_CYCLE | 2.289754 | 0.6357001 | 0.0000000 | 0.0000000 | 1.5092628 | 604 | TYMS/TK1/UBE2C/CDC20/BIRC5/… |
| REACTOME_CELL_CYCLE_MITOTIC | 2.380680 | 0.6684330 | 0.0000000 | 0.0000000 | 1.4954793 | 474 | TYMS/TK1/UBE2C/CDC20/BIRC5/… |
| REACTOME_CELL_CYCLE_CHECKPOINTS | 2.288546 | 0.6742240 | 0.0000000 | 0.0000000 | 1.0476265 | 238 | CDC20/CENPM/MCM4/CENPF/CLSPN/… |
| REACTOME_MITOTIC_G1_PHASE_AND_G1_S_TRANSITION | 2.409149 | 0.7570970 | 0.0000000 | 0.0000000 | 1.0175448 | 122 | TYMS/TK1/MCM4/TOP2A/RRM2/… |
| REACTOME_MITOTIC_METAPHASE_AND_ANAPHASE | 2.229852 | 0.6634384 | 0.0000000 | 0.0000000 | 0.9759947 | 215 | UBE2C/CDC20/BIRC5/CENPM/CENPF/… |
| REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION | 2.353102 | 0.7536475 | 0.0000000 | 0.0000000 | 0.9436322 | 109 | CDC20/BIRC5/CENPM/CENPF/CENPU/… |
| REACTOME_SEPARATION_OF_SISTER_CHROMATIDS | 2.242889 | 0.6828669 | 0.0000000 | 0.0000000 | 0.9325952 | 171 | UBE2C/CDC20/BIRC5/CENPM/CENPF/… |
| REACTOME_MITOTIC_PROMETAPHASE | 2.229588 | 0.6735329 | 0.0000000 | 0.0000000 | 0.9214260 | 184 | CDC20/BIRC5/CENPM/CENPF/CENPU/… |
| REACTOME_MITOTIC_SPINDLE_CHECKPOINT | 2.311261 | 0.7428671 | 0.0000000 | 0.0000000 | 0.8986712 | 104 | UBE2C/CDC20/BIRC5/CENPM/CENPF/… |
| REACTOME_RHO_GTPASES_ACTIVATE_FORMINS | 2.230452 | 0.7034458 | 0.0000000 | 0.0000000 | 0.8266573 | 119 | CDC20/BIRC5/CENPM/CENPF/CENPU/… |
| REACTOME_SYNTHESIS_OF_BILE_ACIDS_AND_BILE_SALTS | -1.734441 | -0.6925158 | 0.0064504 | 0.0685872 | 0.4070179 | 14 | AKR1C3/OSBPL9/NCOA1/SCP2 |
| REACTOME_ACYL_CHAIN_REMODELLING_OF_PE | -1.757069 | -0.7851604 | 0.0056879 | 0.0639073 | 0.4070179 | 10 | PLBD1/ABHD4/PLA2G12A/PLA2G6/LPCAT3 |
| REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S | -1.710116 | -0.5106442 | 0.0055110 | 0.0623656 | 0.4070179 | 52 | RPS4X/EIF3G/RPS23/EIF3D/RPS5/… |
| REACTOME_SLC_TRANSPORTER_DISORDERS | -1.714183 | -0.5072328 | 0.0047103 | 0.0544806 | 0.4070179 | 56 | SLC29A3/TPR/NUP85/POM121/SLC22A5/… |
| REACTOME_REGULATION_OF_GLUCOKINASE_BY_GLUCOKINASE_REGULATORY_PROTEIN | -1.834728 | -0.6171543 | 0.0030878 | 0.0416451 | 0.4317077 | 29 | TPR/NUP85/POM121/NUP50/NDC1/… |
| REACTOME_SUMOYLATION_OF_SUMOYLATION_PROTEINS | -1.875133 | -0.6100965 | 0.0016279 | 0.0256061 | 0.4550599 | 35 | TPR/NUP85/POM121/PIAS4/NUP50/… |
| REACTOME_TRNA_PROCESSING_IN_THE_NUCLEUS | -1.776648 | -0.5269802 | 0.0012447 | 0.0208296 | 0.4550599 | 58 | TPR/NUP85/POM121/TSEN54/ELAC2/… |
| REACTOME_SUMOYLATION_OF_UBIQUITINYLATION_PROTEINS | -1.844694 | -0.5917798 | 0.0008424 | 0.0155356 | 0.4772708 | 39 | TPR/NUP85/POM121/PIAS4/NUP50/… |
| REACTOME_VIRAL_MESSENGER_RNA_SYNTHESIS | -1.921465 | -0.5992917 | 0.0006187 | 0.0128047 | 0.4772708 | 42 | TPR/NUP85/POM121/POLR2C/POLR2D/… |
| REACTOME_HEME_SIGNALING | -1.974960 | -0.6382712 | 0.0001865 | 0.0048907 | 0.5188481 | 37 | HBA1/SIRT1/NCOA1/CHD9/MEF2D/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_CREATION_OF_C4_AND_C2_ACTIVATORS | 2.065086 | 0.6687077 | 0.0000078 | 0.0015564 | 0.5933255 | 54 | IGHV1-46/IGHV2-70/IGLV2-23/IGHG1/IGKV1D-16/… |
| REACTOME_FCGR_ACTIVATION | 2.052936 | 0.6418801 | 0.0000079 | 0.0015564 | 0.5933255 | 61 | IGHV1-46/IGHV2-70/IGLV2-23/IGHG1/IGKV1D-16/… |
| REACTOME_FCGR3A_MEDIATED_IL10_SYNTHESIS | 1.992534 | 0.5979952 | 0.0000068 | 0.0015564 | 0.6105269 | 77 | IGHV1-46/IGHV2-70/IGLV2-23/IGHG1/IGKV1D-16/… |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 2.057428 | 0.6571009 | 0.0000097 | 0.0016881 | 0.5933255 | 57 | IGHV1-46/IGHV2-70/IGLV2-23/IGHG1/IGKV1D-16/… |
| REACTOME_COMPLEMENT_CASCADE | 2.001713 | 0.6180322 | 0.0000158 | 0.0024712 | 0.5756103 | 66 | IGHV1-46/IGHV2-70/IGLV2-23/IGHG1/IGKV1D-16/… |
| REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION | 2.002822 | 0.6258283 | 0.0000207 | 0.0025407 | 0.5756103 | 63 | IGHV1-46/PDPK1/IGHV2-70/IGLV2-23/IGKV1D-16/… |
| REACTOME_FCERI_MEDIATED_MAPK_ACTIVATION | 1.979456 | 0.5991667 | 0.0000180 | 0.0025407 | 0.5756103 | 72 | JUN/IGHV1-46/FOS/IGHV2-70/IGLV2-23/… |
| REACTOME_FCERI_MEDIATED_CA_2_MOBILIZATION | 1.878472 | 0.5645345 | 0.0000844 | 0.0066104 | 0.5384341 | 75 | IGHV1-46/IGHV2-70/IGLV2-23/IGKV1D-16/IGKV1D-12/… |
| REACTOME_ROLE_OF_PHOSPHOLIPIDS_IN_PHAGOCYTOSIS | 1.856106 | 0.5610546 | 0.0001777 | 0.0132240 | 0.5188481 | 73 | IGHV1-46/IGHV2-70/IGLV2-23/IGHG1/IGKV1D-16/… |
| REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA | 1.884746 | 0.6155340 | 0.0003831 | 0.0240119 | 0.4984931 | 50 | IGHV1-46/IGHV2-70/IGKV1D-16/IGKV1D-12/IGHV4-34/… |
| REACTOME_MICRORNA_MIRNA_BIOGENESIS | -1.875600 | -0.6782889 | 0.0017554 | 0.0859580 | 0.4550599 | 23 | AGO2/POLR2K/POLR2D/POLR2G/XPO5/… |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | -1.889937 | -0.5088080 | 0.0000572 | 0.0049789 | 0.5573322 | 100 | RPL41/RPS15A/RPS16/TRIB3/RPL37A/… |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | -1.924067 | -0.5220484 | 0.0000507 | 0.0047665 | 0.5573322 | 95 | RPL41/RPS15A/EIF3K/RPS16/RPL37A/… |
| REACTOME_PIWI_INTERACTING_RNA_PIRNA_BIOGENESIS | -2.039703 | -0.8056334 | 0.0000517 | 0.0047665 | 0.5573322 | 16 | POLR2K/HSP90AA1/PLD6/POLR2D/HENMT1/… |
| REACTOME_SELENOAMINO_ACID_METABOLISM | -1.930393 | -0.5204926 | 0.0000211 | 0.0025407 | 0.5756103 | 99 | RPL41/RPS15A/RPS16/RPL37A/RPL29/… |
| REACTOME_NONSENSE_MEDIATED_DECAY_NMD | -1.954446 | -0.5186864 | 0.0000046 | 0.0014550 | 0.6105269 | 111 | RPL41/RPS15A/RPS16/RPL37A/RPL29/… |
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | -2.064545 | -0.5685296 | 0.0000038 | 0.0014550 | 0.6272567 | 87 | RPL41/RPS15A/RPS16/RPL37A/RPL29/… |
| REACTOME_INFLUENZA_INFECTION | -1.931008 | -0.4911956 | 0.0000018 | 0.0010311 | 0.6435518 | 150 | KPNA4/RPL41/POLR2K/HSP90AA1/RPS15A/… |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | -1.999921 | -0.5349662 | 0.0000020 | 0.0010311 | 0.6272567 | 108 | RPL41/SPCS2/RPS15A/SSR3/RPS16/… |
| REACTOME_TRANSLATION | -1.822634 | -0.4332126 | 0.0000004 | 0.0006434 | 0.6749629 | 279 | RPL41/MRPL45/PTCD3/TRMT112/MRPL38/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_INTERLEUKIN_4_AND_INTERLEUKIN_13_SIGNALING | 2.294714 | 0.7275841 | 0.0000000 | 0.0000016 | 0.7881868 | 61 | PIM1/SOCS3/OSM/FOS/SOCS1/… |
| REACTOME_NGF_STIMULATED_TRANSCRIPTION | 2.171093 | 0.8327271 | 0.0000005 | 0.0002587 | 0.6594444 | 23 | SGK1/ID3/FOS/JUNB/EGR1 |
| REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 1.644510 | 0.4147830 | 0.0000005 | 0.0002587 | 0.6594444 | 485 | PIM1/TNFRSF4/SOCS3/NFKBIA/IFNGR2/… |
| REACTOME_SIGNALING_BY_INTERLEUKINS | 1.752203 | 0.4552325 | 0.0000023 | 0.0009181 | 0.6272567 | 299 | PIM1/SOCS3/NFKBIA/SOCS2/IL16/… |
| REACTOME_TNFS_BIND_THEIR_PHYSIOLOGICAL_RECEPTORS | 1.971908 | 0.8704376 | 0.0000454 | 0.0142974 | 0.5573322 | 11 | TNFRSF4/TNFRSF18/TNFRSF1A/EDA |
| REACTOME_RUNX1_REGULATES_EXPRESSION_OF_COMPONENTS_OF_TIGHT_JUNCTIONS | 1.614547 | 0.9928643 | 0.0000952 | 0.0249627 | 0.5384341 | 3 | CLDN5/CBFB |
| REACTOME_NUCLEAR_EVENTS_KINASE_AND_TRANSCRIPTION_FACTOR_ACTIVATION | 1.907472 | 0.6571611 | 0.0002072 | 0.0465805 | 0.5188481 | 39 | SGK1/ID3/FOS/JUNB/EGR1 |
| REACTOME_SIGNALING_BY_BMP | 1.941446 | 0.8074343 | 0.0003128 | 0.0615510 | 0.4984931 | 14 | GREM2/UBE2D3/SMURF2/SMAD7/NOG/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_NEUTROPHIL_DEGRANULATION | 1.913036 | 0.5267514 | 0.0000000 | 0.0000026 | 0.7749390 | 390 | RETN/HP/RNASE2/S100A8/PLAC8/… |
| REACTOME_MHC_CLASS_II_ANTIGEN_PRESENTATION | 2.136595 | 0.6914075 | 0.0000031 | 0.0009637 | 0.6272567 | 85 | HLA-DRB1/HLA-DPB1/CTSD/HLA-DPA1/HLA-DRA/… |
| REACTOME_ANTIMICROBIAL_PEPTIDES | 2.027888 | 0.8628127 | 0.0000105 | 0.0027300 | 0.5933255 | 20 | CLU/S100A8/S100A9/RNASE3/CD4/… |
| REACTOME_TCR_SIGNALING | 1.946730 | 0.6130937 | 0.0000589 | 0.0131838 | 0.5573322 | 105 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/NFKBIA/… |
| REACTOME_GENERATION_OF_SECOND_MESSENGER_MOLECULES | 1.964273 | 0.7674385 | 0.0001058 | 0.0207243 | 0.5384341 | 29 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DQB1/… |
| REACTOME_ANTIGEN_PROCESSING_CROSS_PRESENTATION | 1.919823 | 0.6190395 | 0.0001929 | 0.0335851 | 0.5188481 | 89 | S100A8/FCGR1A/S100A9/PSMB6/PSME2/… |
| REACTOME_COSTIMULATION_BY_THE_CD28_FAMILY | 1.960511 | 0.6829988 | 0.0003529 | 0.0480993 | 0.4984931 | 54 | MAP3K8/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/… |
| REACTOME_INTERFERON_GAMMA_SIGNALING | 1.905652 | 0.6396230 | 0.0003174 | 0.0480993 | 0.4984931 | 68 | FCGR1A/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/… |
| REACTOME_PD_1_SIGNALING | 1.960298 | 0.8610723 | 0.0006475 | 0.0676439 | 0.4772708 | 17 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DQB1/… |
| REACTOME_RHO_GTPASES_ACTIVATE_NADPH_OXIDASES | 1.907998 | 0.8570680 | 0.0007417 | 0.0726438 | 0.4772708 | 14 | S100A8/S100A9/RAC2/CYBA |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | 2.927848 | 0.7921139 | 0.0000000 | 0.0000000 | 1.1512205 | 85 | RPS3A/EEF2/RPS15A/EEF1A1/RPL4/… |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | 2.464335 | 0.6602788 | 0.0000000 | 0.0000001 | 0.8266573 | 92 | RPS3A/RPS15A/RPL4/RPL10/RPL8/… |
| REACTOME_SELENOAMINO_ACID_METABOLISM | 2.417585 | 0.6477531 | 0.0000000 | 0.0000003 | 0.8012156 | 92 | RPS3A/RPS15A/PAPSS1/RPL4/RPL10/… |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | 2.304713 | 0.6175108 | 0.0000000 | 0.0000070 | 0.7337620 | 92 | RPS3A/EIF4B/EIF4A2/RPS15A/RPL4/… |
| REACTOME_IRAK4_DEFICIENCY_TLR2_4 | 2.435762 | 0.9374072 | 0.0000007 | 0.0001828 | 0.6594444 | 12 | S100A8/S100A9/CD14/BTK |
| REACTOME_DISEASES_OF_IMMUNE_SYSTEM | 2.385226 | 0.8406022 | 0.0000066 | 0.0009463 | 0.6105269 | 19 | S100A8/S100A9/CD14/NFKB1 |
| REACTOME_REGULATION_OF_TLR_BY_ENDOGENOUS_LIGAND | 2.308998 | 0.9370186 | 0.0000069 | 0.0009463 | 0.6105269 | 10 | S100A8/S100A9/CD14 |
| REACTOME_PD_1_SIGNALING | 2.269443 | 0.8676930 | 0.0000624 | 0.0052552 | 0.5384341 | 13 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| REACTOME_ANTIMICROBIAL_PEPTIDES | 2.196307 | 0.8397303 | 0.0001972 | 0.0119500 | 0.5188481 | 13 | S100A8/S100A9/CLU/CD4 |
| REACTOME_GENERATION_OF_SECOND_MESSENGER_MOLECULES | 2.162489 | 0.7621052 | 0.0004766 | 0.0169423 | 0.4984931 | 19 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| REACTOME_MATURATION_OF_SARS_COV_1_NUCLEOPROTEIN | -1.630307 | -0.8722066 | 0.0035578 | 0.0550004 | 0.2798657 | 6 | PARP4/SUMO1/PARP9 |
| REACTOME_PYRUVATE_METABOLISM | -1.750079 | -0.7666272 | 0.0005338 | 0.0179708 | 0.4772708 | 15 | RXRA/MPC2/PDHX/PDPR/PDK4/… |
| REACTOME_REGULATION_OF_PTEN_STABILITY_AND_ACTIVITY | -1.654283 | -0.5920429 | 0.0004374 | 0.0169423 | 0.4984931 | 55 | WWP2/PSMB5/PSMC3/PSMB8/PSMA5/… |
| REACTOME_VISUAL_PHOTOTRANSDUCTION | -1.736885 | -0.7713666 | 0.0003413 | 0.0149190 | 0.4984931 | 14 | CALM1/DHRS3/LRP1/METAP1/PLB1/… |
| REACTOME_REGULATION_OF_RUNX3_EXPRESSION_AND_ACTIVITY | -1.633873 | -0.5967284 | 0.0003154 | 0.0144806 | 0.4984931 | 47 | PSMB5/PSMC3/PSMB8/PSMA5/UBC/… |
| REACTOME_PYRUVATE_METABOLISM_AND_CITRIC_ACID_TCA_CYCLE | -1.716785 | -0.6733585 | 0.0003053 | 0.0144806 | 0.4984931 | 28 | RXRA/MPC2/ACO2/SDHD/PDHX/… |
| REACTOME_ORC1_REMOVAL_FROM_CHROMATIN | -1.637401 | -0.5904853 | 0.0002710 | 0.0136872 | 0.4984931 | 52 | PSMB5/PSMC3/PSMB8/PSMA5/UBC/… |
| REACTOME_REGULATION_OF_RUNX2_EXPRESSION_AND_ACTIVITY | -1.685489 | -0.6131684 | 0.0001194 | 0.0086168 | 0.5384341 | 48 | PSMC3/PSMB8/PSMA5/UBC/NR3C1/… |
| REACTOME_G2_M_CHECKPOINTS | -1.634917 | -0.5546823 | 0.0000368 | 0.0042912 | 0.5573322 | 89 | YWHAG/PSMB5/PSMC3/PSMB8/PSMA5/… |
| REACTOME_CYTOPROTECTION_BY_HMOX1 | -1.660849 | -0.5613173 | 0.0000320 | 0.0040450 | 0.5573322 | 94 | RXRA/PSMB5/PSMC3/NDUFA4/BACH1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_CELL_CYCLE | 2.368696 | 0.7573590 | 0.0000000 | 0.0000000 | 0.9653278 | 107 | CDC20/PLK1/PTTG1/PCNA/CDC45/… |
| KEGG_P53_SIGNALING_PATHWAY | 2.213721 | 0.7785267 | 0.0000000 | 0.0000009 | 0.7477397 | 56 | RRM2/CCNB2/CHEK1/CDK1/PMAIP1/… |
| KEGG_OOCYTE_MEIOSIS | 2.073790 | 0.6980852 | 0.0000008 | 0.0000466 | 0.6594444 | 76 | CDC20/PLK1/PTTG1/PKMYT1/CCNB2/… |
| KEGG_DNA_REPLICATION | 2.033468 | 0.7982402 | 0.0000359 | 0.0013342 | 0.5573322 | 30 | MCM4/PCNA/MCM7/MCM2/FEN1/… |
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 1.913613 | 0.6293824 | 0.0000315 | 0.0013342 | 0.5573322 | 86 | H3C2/H2AZ1/H4C3/H2AX/H2AC16/… |
| KEGG_ONE_CARBON_POOL_BY_FOLATE | 1.901350 | 0.8649804 | 0.0001015 | 0.0027555 | 0.5384341 | 15 | TYMS/DHFR/SHMT2/MTHFD1/MTHFD2/… |
| KEGG_PYRIMIDINE_METABOLISM | 1.862932 | 0.6292376 | 0.0001037 | 0.0027555 | 0.5384341 | 74 | TYMS/TK1/RRM2/DUT/RRM1/… |
| KEGG_PROGESTERONE_MEDIATED_OOCYTE_MATURATION | 1.881709 | 0.6527769 | 0.0002091 | 0.0048611 | 0.5188481 | 59 | PLK1/PKMYT1/CCNB2/CDK1/MAD2L1/… |
| KEGG_PATHWAYS_IN_CANCER | 1.598302 | 0.4809982 | 0.0008310 | 0.0171738 | 0.4772708 | 182 | BIRC5/NFKBIA/RAD51/BAX/BRCA2/… |
| KEGG_PROTEASOME | 1.770950 | 0.6571985 | 0.0016089 | 0.0299255 | 0.4550599 | 41 | PSMD2/PSME2/PSMD3/PSMA4/POMP/… |
| KEGG_RIBOSOME | -1.582053 | -0.4392298 | 0.0044593 | 0.0754020 | 0.4070179 | 84 | RPS4X/RPL11/RPL10A/RPL6/RPS5/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_MAPK_SIGNALING_PATHWAY | 1.601598 | 0.4354561 | 0.0009307 | 0.0865506 | 0.4772708 | 146 | JUN/DUSP1/JUND/MYC/FOS/… |
| KEGG_RIBOSOME | -2.013517 | -0.5595300 | 0.0000050 | 0.0009305 | 0.6105269 | 84 | RPL41/RPS15A/RPS16/RPL37A/RPL29/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_JAK_STAT_SIGNALING_PATHWAY | 2.004276 | 0.6090932 | 0.0000042 | 0.0007840 | 0.6105269 | 76 | PIM1/SOCS3/IFNGR2/SOCS2/OSM/… |
| KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION | 1.902784 | 0.5630419 | 0.0000187 | 0.0017260 | 0.5756103 | 94 | TNFRSF4/IFNGR2/TNFRSF18/OSM/TNFRSF1A/… |
| KEGG_LEISHMANIA_INFECTION | 1.878891 | 0.6215761 | 0.0002953 | 0.0182118 | 0.4984931 | 45 | NFKBIA/IFNGR2/FOS/IL10/CYBA/… |
| KEGG_TYPE_II_DIABETES_MELLITUS | 1.921674 | 0.7509114 | 0.0005875 | 0.0271702 | 0.4772708 | 20 | SOCS3/SOCS2/SOCS1/PRKCZ/PIK3CD |
| KEGG_COLORECTAL_CANCER | 1.817259 | 0.5895323 | 0.0012838 | 0.0395842 | 0.4550599 | 50 | FOS/BAX/JUN/BIRC5/LEF1/… |
| KEGG_ADIPOCYTOKINE_SIGNALING_PATHWAY | 1.784819 | 0.5870618 | 0.0011168 | 0.0395842 | 0.4550599 | 47 | SOCS3/NFKBIA/TNFRSF1A/TRADD/ADIPOR1/… |
| KEGG_PARKINSONS_DISEASE | 1.597732 | 0.4682974 | 0.0025029 | 0.0661474 | 0.4317077 | 104 | MT-ND3/MT-ND1/MT-ATP6/MT-CYB/ATP5F1D/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_LEISHMANIA_INFECTION | 2.270116 | 0.7998974 | 0.0000006 | 0.0001141 | 0.6594444 | 45 | FCGR1A/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/… |
| KEGG_INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION | 2.232035 | 0.8706325 | 0.0000056 | 0.0005193 | 0.6105269 | 25 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_ALLOGRAFT_REJECTION | 2.118426 | 0.8310802 | 0.0000346 | 0.0012349 | 0.5573322 | 24 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_ASTHMA | 2.115794 | 0.9220639 | 0.0000304 | 0.0012349 | 0.5756103 | 14 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_CELL_ADHESION_MOLECULES_CAMS | 2.106790 | 0.7173294 | 0.0000398 | 0.0012349 | 0.5573322 | 56 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION | 2.104295 | 0.7213498 | 0.0000510 | 0.0013560 | 0.5573322 | 53 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_GRAFT_VERSUS_HOST_DISEASE | 2.126954 | 0.8296444 | 0.0001305 | 0.0025492 | 0.5188481 | 25 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_VIRAL_MYOCARDITIS | 2.123395 | 0.7556473 | 0.0001374 | 0.0025492 | 0.5188481 | 42 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_TYPE_I_DIABETES_MELLITUS | 2.120448 | 0.8271065 | 0.0001508 | 0.0025492 | 0.5188481 | 25 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| KEGG_AUTOIMMUNE_THYROID_DISEASE | 2.107792 | 0.8550304 | 0.0002320 | 0.0035955 | 0.5188481 | 20 | HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DRA/HLA-DMB/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_RIBOSOME | 2.940224 | 0.7816192 | 0.0000000 | 0.0000000 | 1.1146645 | 84 | RPS3A/RPS15A/RPL4/RPL10/RPL8/… |
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 2.411273 | 0.7243773 | 0.0000005 | 0.0000478 | 0.6594444 | 41 | HLA-DPB1/HLA-DQA1/FCGR1A/C1QB/HLA-DRB1/… |
| KEGG_LEISHMANIA_INFECTION | 2.284199 | 0.6862026 | 0.0000045 | 0.0002737 | 0.6105269 | 41 | HLA-DPB1/HLA-DQA1/FCGR1A/IFNGR2/HLA-DRB1/… |
| KEGG_ASTHMA | 2.286285 | 0.8642071 | 0.0000114 | 0.0004187 | 0.5933255 | 15 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| KEGG_INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION | 2.276805 | 0.7954074 | 0.0000096 | 0.0004187 | 0.5933255 | 22 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| KEGG_CELL_ADHESION_MOLECULES_CAMS | 2.155025 | 0.6473973 | 0.0000356 | 0.0010911 | 0.5573322 | 41 | HLA-DPB1/HLA-DQA1/VCAN/HLA-DRB1/HLA-DPA1/… |
| KEGG_AUTOIMMUNE_THYROID_DISEASE | 2.164303 | 0.7859898 | 0.0001654 | 0.0043481 | 0.5188481 | 18 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| KEGG_ALLOGRAFT_REJECTION | 2.088005 | 0.7454467 | 0.0005937 | 0.0121386 | 0.4772708 | 21 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| KEGG_TYPE_I_DIABETES_MELLITUS | 2.069722 | 0.7389193 | 0.0007541 | 0.0138750 | 0.4772708 | 21 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| KEGG_VIRAL_MYOCARDITIS | 2.083402 | 0.6130263 | 0.0011348 | 0.0160614 | 0.4550599 | 37 | HLA-DPB1/HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DRA/… |
| KEGG_VIBRIO_CHOLERAE_INFECTION | -1.536724 | -0.6022936 | 0.0102672 | 0.0992129 | 0.3807304 | 28 | TCIRG1/ATP6V1G1/PDIA4/GNAS/ATP6V1C1/… |
| KEGG_CITRATE_CYCLE_TCA_CYCLE | -1.555799 | -0.6333197 | 0.0108629 | 0.0992129 | 0.3807304 | 22 | ACO2/SDHD/OGDH/IDH3A/SDHB/… |
| KEGG_ALZHEIMERS_DISEASE | -1.378605 | -0.4562842 | 0.0085468 | 0.0873675 | 0.3807304 | 111 | UQCRFS1/SDHD/NDUFA4/UQCR10/CALM1/… |
| KEGG_EPITHELIAL_CELL_SIGNALING_IN_HELICOBACTER_PYLORI_INFECTION | -1.550103 | -0.5914938 | 0.0045306 | 0.0490367 | 0.4070179 | 34 | TCIRG1/ATP6V1G1/ATP6V1C1/ATP6V0A1/MAPK14/… |
| KEGG_NEUROACTIVE_LIGAND_RECEPTOR_INTERACTION | -1.634120 | -0.6729056 | 0.0027635 | 0.0317803 | 0.4317077 | 20 | S1PR4/HRH2/NR3C1/CYSLTR1/CHRNB1/… |
| KEGG_SPLICEOSOME | -1.485363 | -0.4918235 | 0.0017076 | 0.0209465 | 0.4550599 | 110 | SNW1/CWC15/EIF4A3/TXNL4A/TRA2A/… |
| KEGG_PROTEASOME | -1.626411 | -0.6076102 | 0.0011233 | 0.0160614 | 0.4550599 | 38 | PSMB5/PSMB8/PSMA5/PSMA3/PSME1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HSD17B8_TARGET_GENES | 2.505922 | 0.7051095 | 0.0000000 | 0.0000000 | 1.6595653 | 446 | TK1/MKI67/UBE2C/CDC20/BIRC5/… |
| PSMB5_TARGET_GENES | 2.085074 | 0.6145990 | 0.0000000 | 0.0000000 | 0.8513391 | 228 | STMN1/LGALS1/RRM2/TUBA1B/H2AZ1/… |
| E2F5_TARGET_GENES | 1.664771 | 0.4567061 | 0.0000000 | 0.0000000 | 0.8390889 | 871 | TK1/STMN1/MKI67/BIRC5/TOP2A/… |
| AEBP2_TARGET_GENES | 1.457557 | 0.4007301 | 0.0000198 | 0.0019530 | 0.5756103 | 802 | TYMS/TK1/UBE2C/CENPM/MCM4/… |
| BARX1_TARGET_GENES | 1.406124 | 0.3847913 | 0.0000227 | 0.0019530 | 0.5756103 | 973 | UBE2C/CDC20/AREG/TOP2A/CENPF/… |
| SETD7_TARGET_GENES | 1.471061 | 0.4063160 | 0.0000316 | 0.0023282 | 0.5573322 | 705 | TYMS/STMN1/EIF5A/RRM2/DUT/… |
| ASH1L_TARGET_GENES | 1.366928 | 0.3731608 | 0.0000421 | 0.0027179 | 0.5573322 | 1104 | TK1/CENPM/TOP2A/CENPF/TPX2/… |
| ZNF597_TARGET_GENES | 1.413176 | 0.3928013 | 0.0004363 | 0.0250162 | 0.4984931 | 589 | LGALS1/CENPU/MCM7/H4C3/NFKBIA/… |
| PHF21A_TARGET_GENES | 1.583297 | 0.4778271 | 0.0012691 | 0.0654868 | 0.4550599 | 178 | TK1/RRM2/UHRF1/CDCA7/PTGDS/… |
| CSHL1_TARGET_GENES | -1.861877 | -0.4777497 | 0.0000020 | 0.0002574 | 0.6272567 | 155 | DERA/CCDC107/HM13/APEX1/ZNF19/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GTF2E2_TARGET_GENES | 1.483473 | 0.3719489 | 0.0009853 | 0.0792437 | 0.4550599 | 285 | KLF6/DUSP1/PPP4C/SNHG5/PPP1R15A/… |
| GTF2A2_TARGET_GENES | 1.450753 | 0.3571803 | 0.0010750 | 0.0792437 | 0.4550599 | 349 | ARPC5L/KLF6/DUSP1/JUND/MYC/… |
| ELF2_TARGET_GENES | -1.226398 | -0.2632236 | 0.0009948 | 0.0792437 | 0.4550599 | 1200 | C12orf4/DDX5/ACO2/SOCS4/LSM8/… |
| ZNF250_TARGET_GENES | -1.391894 | -0.3235804 | 0.0009795 | 0.0792437 | 0.4550599 | 387 | NDUFS3/DNAJC11/LSM8/STX18/NSL1/… |
| BARX2_TARGET_GENES | -1.233101 | -0.2642737 | 0.0003832 | 0.0659146 | 0.4984931 | 1367 | NDUFS3/C12orf4/LIN7C/SLC25A35/RPL41/… |
| ZSCAN2_TARGET_GENES | -1.380313 | -0.3098029 | 0.0002463 | 0.0659146 | 0.4984931 | 573 | C12orf4/MRPL45/POLR2K/MBD5/DDX31/… |
| ZZZ3_TARGET_GENES | -1.727348 | -0.4849696 | 0.0003013 | 0.0659146 | 0.4984931 | 89 | RPL41/CSNK1D/ZNF655/PSMD1/RPL37A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| PSMB5_TARGET_GENES | 1.914131 | 0.5194175 | 0.0000000 | 0.0000161 | 0.7195128 | 220 | SGK1/SNHG5/NFKBIZ/ZFP36/LDHA/… |
| HMCES_TARGET_GENES | 1.890097 | 0.8917071 | 0.0000747 | 0.0192655 | 0.5384341 | 9 | MT-ND3/MT-CYB/MT-ND4/MT-ND4L/MT-ND6/… |
| SS18_SSX1_FUSION_UNIPROT_Q8IZH1_UNREVIEWED_TARGET_GENES | 1.727403 | 0.5490251 | 0.0014488 | 0.0838067 | 0.4550599 | 61 | PTGER2/ARID5B/LTBP3/RHOQ/KLF11/… |
| GTF2A2_TARGET_GENES | 1.465602 | 0.3793365 | 0.0006167 | 0.0838067 | 0.4772708 | 345 | SGK1/SNHG5/FOS/ZFP36/LDHA/… |
| ZNF282_TARGET_GENES | 1.342630 | 0.3323978 | 0.0011904 | 0.0838067 | 0.4550599 | 690 | PTGER2/SOCS3/IMPDH1/BAX/GUK1/… |
| ZNF560_TARGET_GENES | 1.775012 | 0.6601199 | 0.0020491 | 0.0881121 | 0.4317077 | 27 | HMGA1/LINC-PINT/OLFM2/CAPN2/KAT8/… |
| DROSHA_TARGET_GENES | 1.736157 | 0.6032446 | 0.0017089 | 0.0881121 | 0.4550599 | 37 | MT-ND3/MT-ND1/MT-ATP6/MT-ND2/EGR1/… |
| RARB_TARGET_GENES | 1.559662 | 0.9743756 | 0.0019330 | 0.0881121 | 0.4550599 | 3 | MT-ND3/MT-ND4/MT-ND4L |
| TFAM_TARGET_GENES | 1.696094 | 0.8910341 | 0.0024859 | 0.0986694 | 0.4317077 | 6 | MT-CYB/MT-ND2/MT-ND6/MT-ND5 |
| NKX2_5_TARGET_GENES | -1.249709 | -0.2684947 | 0.0006981 | 0.0838067 | 0.4772708 | 795 | HMGCR/DHX8/APPBP2-DT/NRL/STARD4/… |
| DLX6_TARGET_GENES | -1.293392 | -0.2890692 | 0.0013633 | 0.0838067 | 0.4550599 | 428 | HMGCR/LINC02453/ABHD11/DHFR/ALKBH1/… |
| CDC5L_TARGET_GENES | -1.529188 | -0.3793973 | 0.0009913 | 0.0838067 | 0.4550599 | 164 | HMGCR/NDUFS3/ERAP1/ZNF207/ANKMY1/… |
| ZNF707_TARGET_GENES | -1.829861 | -0.6452660 | 0.0014617 | 0.0838067 | 0.4550599 | 25 | BRD1/PRMT5-AS1/COMMD10/HSPA4/ZNF48/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| PSMB5_TARGET_GENES | 2.049872 | 0.5880093 | 0.0000000 | 0.0000108 | 0.7337620 | 222 | ID1/ADM/SNHG5/H1-2/LDHA/… |
| ZNF407_TARGET_GENES | -1.228450 | -0.2708254 | 0.0004416 | 0.0568529 | 0.4984931 | 1409 | MYO1C/FTO/DNAJC1/QSOX1/VPS33A/… |
| SUPT20H_TARGET_GENES | -1.342613 | -0.3036060 | 0.0000066 | 0.0011374 | 0.6105269 | 1033 | VPS33A/EIF4A1/TTC31/MIS18A/RPS29/… |
| DIDO1_TARGET_GENES | -1.400445 | -0.3117774 | 0.0000001 | 0.0000367 | 0.6901325 | 1247 | AK9/NME7/DNAJC1/TTC31/EMC2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| ZNF711_TARGET_GENES | -1.276627 | -0.3897236 | 3.44e-04 | 0.0885925 | 0.4984931 | 861 | WWP2/ADAP2/NOL11/DIS3L/MPC2/… |
| SUPT20H_TARGET_GENES | -1.359407 | -0.4163061 | 7.60e-06 | 0.0038997 | 0.6105269 | 667 | ADAP2/CCNT1/ACO2/ITGAE/PRMT1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| E2F_Q4 | 2.235083 | 0.6796246 | 0.0000000 | 0.0000000 | 0.9545416 | 188 | STMN1/MCM4/CLSPN/RRM2/PCNA/… |
| E2F_Q3 | 2.181514 | 0.6730975 | 0.0000000 | 0.0000000 | 0.8986712 | 164 | STMN1/MCM4/PCNA/H2AZ1/CDC45/… |
| E2F1DP2_01 | 2.199273 | 0.6793092 | 0.0000000 | 0.0000000 | 0.8870750 | 156 | STMN1/MCM4/CLSPN/RRM2/PCNA/… |
| E2F_Q6_01 | 2.158422 | 0.6613600 | 0.0000000 | 0.0000000 | 0.8753251 | 176 | STMN1/EIF5A/MCM4/RRM2/PCNA/… |
| E2F4DP1_01 | 2.143873 | 0.6508513 | 0.0000000 | 0.0000000 | 0.8753251 | 190 | STMN1/MCM4/CLSPN/RRM2/PCNA/… |
| E2F_Q6 | 2.143655 | 0.6516007 | 0.0000000 | 0.0000000 | 0.8753251 | 184 | STMN1/MCM4/CLSPN/RRM2/PCNA/… |
| E2F1_Q6 | 2.118278 | 0.6430303 | 0.0000000 | 0.0000000 | 0.8513391 | 187 | STMN1/MCM4/CLSPN/RRM2/PCNA/… |
| E2F4DP2_01 | 2.113440 | 0.6430380 | 0.0000000 | 0.0000000 | 0.8513391 | 182 | STMN1/MCM4/CLSPN/RRM2/PCNA/… |
| E2F_03 | 2.098822 | 0.6403090 | 0.0000000 | 0.0000000 | 0.8266573 | 179 | STMN1/EIF5A/MCM4/LGALS1/H2AZ1/… |
| E2F1DP1RB_01 | 2.127433 | 0.6617685 | 0.0000000 | 0.0000000 | 0.8140358 | 145 | STMN1/CLSPN/RRM2/PCNA/H2AZ1/… |
| CCANNAGRKGGC_UNKNOWN | -2.004137 | -0.6146698 | 0.0001317 | 0.0036526 | 0.5188481 | 49 | RPS4X/SSH2/EXT2/YY1/RPRD1B/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| ATF3_Q6 | 1.818764 | 0.4968675 | 0.0000227 | 0.0138582 | 0.5756103 | 136 | JUN/DUSP1/JUND/AREG/PPP1R15A/… |
| MEF2_03 | 1.778060 | 0.5003649 | 0.0001783 | 0.0543947 | 0.5188481 | 110 | RHOB/JUN/S1PR1/STRADB/FOS/… |
| TATAAA_TATA_01 | 1.426909 | 0.3404044 | 0.0003982 | 0.0809728 | 0.4984931 | 497 | CD69/ZBTB10/EMP3/TSC22D3/MYC/… |
| MEF2_Q6_01 | 1.755602 | 0.5037722 | 0.0006396 | 0.0975350 | 0.4772708 | 98 | RHOB/JUN/STRADB/JCHAIN/CD180/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| RACCACAR_AML_Q6 | 1.753159 | 0.4949765 | 0.0000882 | 0.0538283 | 0.5384341 | 140 | EIF5A/CD69/ID3/RGS1/POLG2/… |
| AML_Q6 | 1.724139 | 0.4872107 | 0.0002042 | 0.0622800 | 0.5188481 | 136 | EIF5A/CD69/RGS1/POLG2/SRSF2/… |
| STAT5B_01 | 1.701975 | 0.4870626 | 0.0005406 | 0.0715985 | 0.4772708 | 122 | NFKBIA/PRDM1/SOCS2/CLDN5/OSM/… |
| CREL_01 | 1.680906 | 0.4729864 | 0.0004008 | 0.0715985 | 0.4984931 | 143 | EIF5A/CD69/NFKBIA/CLDN5/FLOT1/… |
| HEN1_02 | 1.658047 | 0.4810706 | 0.0010251 | 0.0715985 | 0.4550599 | 105 | LRRN3/IMPDH1/UBE2D3/LZTS2/F11R/… |
| NFKAPPAB65_01 | 1.657037 | 0.4736872 | 0.0008188 | 0.0715985 | 0.4772708 | 127 | EIF5A/CD69/NFKBIA/CLDN5/FLOT1/… |
| AP1_Q4_01 | 1.646911 | 0.4653875 | 0.0009130 | 0.0715985 | 0.4772708 | 136 | PRDM1/SRSF2/IL10/NR1D1/REXO2/… |
| CHOP_01 | 1.644826 | 0.4692138 | 0.0010564 | 0.0715985 | 0.4550599 | 128 | NFKBIA/PRDM1/SRSF2/H1-4/KLF12/… |
| WGTTNNNNNAAA_UNKNOWN | 1.494007 | 0.3912205 | 0.0008268 | 0.0715985 | 0.4772708 | 270 | CD69/PRDM1/SOCS2/FLOT1/FOS/… |
| HEN1_01 | 1.690153 | 0.5090659 | 0.0015199 | 0.0927120 | 0.4550599 | 85 | CLDN5/IMPDH1/UBE2D3/CDK6/LZTS2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| LMO2COM_02 | 1.839400 | 0.5834059 | 0.0000978 | 0.0596486 | 0.5384341 | 102 | CLU/ADM/ARHGEF10L/ID2/NAA38/… |
| PAX5_02 | 1.936611 | 0.8885264 | 0.0002801 | 0.0709771 | 0.4984931 | 12 | PIM1/RASGRP2/PFN1 |
| TGANTCA_AP1_C | 1.488211 | 0.4006211 | 0.0003491 | 0.0709771 | 0.4984931 | 555 | PIM1/ADM/FAM20A/LGALS1/FKBP5/… |
Here we check number of differentially expressed genes (DEGs) after applying some statistical thresholds:
Adjusted P value < 0.1
Log Fold Change > +/- 0
Proportion of cells expressing gene in condition A > 0.1 if LFC is positive
Proportion of cells expressing gene in condition B > 0.1 if LFC is negative
Differential expression compares genes in condition A (left) versus condition B (right). The left column indicates the number of genes upregulated in the left condition, while the right column indicates genes upregulated in the right condition.
Finally, the thresholds above do not affect downstream results from pathway analysis, they are just meant to count the number of DEGs. Downstream usage of DEGs can use these thresholds, or you can choose other appropriate cutoffs.
| Cluster | Covid_Mild_high | Healthy_high |
|---|---|---|
| cluster_1 | 4 | 2 |
| cluster_2 | 10 | 6 |
| cluster_3 | 12 | 12 |
| cluster_4 | 32 | 4 |
| cluster_5 | 23 | 2 |
| cluster_6 | 14 | 1 |
| cluster_7 | 0 | 0 |
To ensure the differential expression results are robust, it is helpful to inspect all DEGs by visualizing them in a heatmap. For each cluster, we compare the DEGs at both the single cell level, and either the pseudobulk level (if we used pseudobulk_edgeR) or the average RISC value level (if using wilcox).
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Pathway analysis for the cross-condition analysis is performed for overexpressed and underexpressed genes for each cluster. This is done via Gene Set Enrichment Analysis (GSEA) (Subramanian et al 2005).
GSEA is preferred over other pathway analysis such as fisher tests or chi-square tests because it does not require making arbitrary cutoffs to the number of DEGs and takes into account how strongly differentially expressed each gene may be. For the latter, data-driven gene-specific weight is applied. We use a standard weighting method of -log10(P-value) * sign of Log Fold Change.
The pathways we choose in pathway analysis are derived from the Molecular Signatures Database (MSIGDB) where they are sorted by categories, such as Gene Ontology (GO) Biological Process, GO Molecular Function, KEGG, Reactome, etc. These are databases that annotate genes by function or molecular pathway.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 1.860675 | 0.5225121 | 0.0034503 | 0.0575053 | 0.4317077 | 89 | EIF2AK2/CMPK2/IFI44L/EPSTI1/OAS1/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.741909 | 0.4831213 | 0.0107553 | 0.0896279 | 0.3807304 | 104 | NFKBIA/EIF2AK2/IL7R/PTGER4/CYBB/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.670735 | 0.4333155 | 0.0095972 | 0.0896279 | 0.3807304 | 167 | NFKBIA/EIF2AK2/IRF8/CMPK2/IFI44L/… |
| HALLMARK_DNA_REPAIR | -1.470810 | -0.3853608 | 0.0069397 | 0.0867465 | 0.4070179 | 138 | GMPR2/ERCC8/VPS37B/REV3L/NUDT21/… |
| HALLMARK_MYC_TARGETS_V1 | -1.537450 | -0.3866300 | 0.0014790 | 0.0415748 | 0.4550599 | 189 | KPNB1/NCBP2/COX5A/PSMD14/SNRPA1/… |
| HALLMARK_OXIDATIVE_PHOSPHORYLATION | -1.537718 | -0.3865418 | 0.0016630 | 0.0415748 | 0.4550599 | 190 | COX11/COX7A2L/OGDH/ATP5MC3/COX5A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.648761 | 0.4191647 | 0.0019 | 0.0950022 | 0.4550599 | 162 | CD69/WARS1/IFI44L/SSPN/PFKP/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.836235 | 0.4509373 | 0.0000282 | 0.0014096 | 0.5756103 | 159 | CD69/ARID5B/EIF2AK2/PDE4B/TNFAIP3/… |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 1.779867 | 0.4827466 | 0.0005735 | 0.0095586 | 0.4772708 | 86 | EIF2AK2/IFITM3/OASL/LY6E/MX1/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.618972 | 0.4276528 | 0.0039414 | 0.0394139 | 0.4070179 | 97 | CD69/CCR7/EIF2AK2/PDE4B/PTGER2/… |
| HALLMARK_OXIDATIVE_PHOSPHORYLATION | -1.483782 | -0.3894638 | 0.0037731 | 0.0394139 | 0.4317077 | 188 | MRPL35/SDHB/UQCR11/MTRR/ATP5F1B/… |
| HALLMARK_MYC_TARGETS_V1 | -1.651533 | -0.4341729 | 0.0001322 | 0.0033056 | 0.5188481 | 186 | NCBP2/CCT2/RAD23B/CAD/CCT4/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 2.219077 | 0.5766389 | 0.0000000 | 0.0000002 | 0.7614608 | 171 | FCGR1A/IFI27/IFITM2/NFKBIA/MT2A/… |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 2.024761 | 0.5815479 | 0.0000192 | 0.0003208 | 0.5756103 | 89 | IFI27/IFITM2/IFITM3/IFITM1/OASL/… |
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 1.949830 | 0.5144732 | 0.0000155 | 0.0003208 | 0.5756103 | 147 | MAP3K8/GADD45B/ID2/BCL2A1/NFKBIA/… |
| HALLMARK_COMPLEMENT | 1.842105 | 0.4912575 | 0.0000766 | 0.0009571 | 0.5384341 | 136 | C1QC/CLU/C1QA/PIM1/GZMA/… |
| HALLMARK_KRAS_SIGNALING_DN | 1.882308 | 0.6238281 | 0.0006570 | 0.0065702 | 0.4772708 | 40 | IFI44L/RSAD2/CAMK1D/SLC16A7/LFNG/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.781600 | 0.4835323 | 0.0008720 | 0.0072671 | 0.4772708 | 122 | NFKBIA/ADM/IFITM1/ABCA1/CD40/… |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 1.772822 | 0.5401254 | 0.0012207 | 0.0076297 | 0.4550599 | 63 | GADD45B/ID2/SGCB/CXCL8/BMP1/… |
| HALLMARK_MYOGENESIS | 1.682561 | 0.4971966 | 0.0025593 | 0.0127963 | 0.4317077 | 73 | CLU/GADD45B/TNNT1/BHLHE40/CDKN1A/… |
| HALLMARK_COAGULATION | 1.705623 | 0.5436959 | 0.0030711 | 0.0139595 | 0.4317077 | 51 | CLU/C1QA/SERPING1/PF4/PRSS23/… |
| HALLMARK_TGF_BETA_SIGNALING | 1.701278 | 0.5606248 | 0.0037771 | 0.0157378 | 0.4317077 | 41 | ID1/ID2/SMAD7/KLF10/FNTA |
| HALLMARK_OXIDATIVE_PHOSPHORYLATION | -1.438241 | -0.3508565 | 0.0054276 | 0.0208754 | 0.4070179 | 192 | AIFM1/DLST/CYC1/TIMM50/NDUFB7/… |
| HALLMARK_DNA_REPAIR | -1.598664 | -0.4080443 | 0.0018895 | 0.0104973 | 0.4550599 | 135 | AAAS/HPRT1/USP11/TSG101/ITPA/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 2.460772 | 0.6327149 | 0 | 0 | 0.8870750 | 150 | IFI27/IFI44L/IFIT1/ISG15/MX1/… |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 2.618870 | 0.7359381 | 0 | 0 | 0.8390889 | 81 | IFI27/IFI44L/IFITM1/ISG15/MX1/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 2.640576 | 0.7428698 | 0e+00 | 0.0e+00 | 0.8513391 | 74 | IFI27/IFITM1/IFITM3/LY6E/MX1/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 2.261363 | 0.5835799 | 1e-07 | 1.6e-06 | 0.7049757 | 137 | IFI27/IFITM3/XAF1/LY6E/MX1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 2.369033 | 0.5602643 | 0.0000000 | 0.0000000 | 1.1512205 | 415 | TRAV38-2DV8/TRAV8-4/TRAV26-2/TRAV12-2/TRBV20-1/… |
| GOBP_IMMUNE_RESPONSE | 1.871305 | 0.4080207 | 0.0000000 | 0.0000000 | 1.0672100 | 1098 | TRAV38-2DV8/TRAV8-4/TRAV26-2/FASLG/NFKBIA/… |
| GOBP_BIOLOGICAL_PROCESS_INVOLVED_IN_INTERSPECIES_INTERACTION_BETWEEN_ORGANISMS | 1.566107 | 0.3463131 | 0.0000004 | 0.0006192 | 0.6749629 | 908 | TRAV26-2/FASLG/NFKBIA/KIR3DL1/TRAV4/… |
| GOBP_RESPONSE_TO_BACTERIUM | 1.865006 | 0.4481722 | 0.0000011 | 0.0014159 | 0.6435518 | 343 | TRAV26-2/FASLG/NFKBIA/IGHV3-74/IGHV3-73/… |
| GOBP_DEFENSE_RESPONSE | 1.481718 | 0.3278106 | 0.0000349 | 0.0290092 | 0.5573322 | 873 | FASLG/NFKBIA/KIR3DL1/IGHV3-74/EIF2AK2/… |
| GOBP_DEFENSE_RESPONSE_TO_OTHER_ORGANISM | 1.532996 | 0.3488651 | 0.0000419 | 0.0313079 | 0.5573322 | 634 | FASLG/KIR3DL1/IGHV3-74/EIF2AK2/IGHV3-73/… |
| GOBP_INNATE_IMMUNE_RESPONSE | 1.552066 | 0.3585867 | 0.0001527 | 0.0877615 | 0.5188481 | 516 | FASLG/KIR3DL1/IGHV3-74/EIF2AK2/IGHV3-73/… |
| GOBP_CELLULAR_AMIDE_METABOLIC_PROCESS | -1.403867 | -0.3110330 | 0.0000874 | 0.0543959 | 0.5384341 | 809 | RPL13/MARS2/BTG2/METTL3/NEMF/… |
| GOBP_CELLULAR_MACROMOLECULE_BIOSYNTHETIC_PROCESS | -1.345345 | -0.2955738 | 0.0000728 | 0.0494436 | 0.5384341 | 1030 | RPL13/BTG2/METTL3/NEMF/B4GALT4/… |
| GOBP_AMIDE_BIOSYNTHETIC_PROCESS | -1.536444 | -0.3463365 | 0.0000066 | 0.0061439 | 0.6105269 | 637 | RPL13/MARS2/BTG2/METTL3/NEMF/… |
| GOBP_PEPTIDE_METABOLIC_PROCESS | -1.555445 | -0.3506830 | 0.0000013 | 0.0014248 | 0.6435518 | 650 | RPL13/MARS2/BTG2/METTL3/NEMF/… |
| GOBP_PEPTIDE_BIOSYNTHETIC_PROCESS | -1.637163 | -0.3760544 | 0.0000004 | 0.0006192 | 0.6749629 | 523 | RPL13/MARS2/BTG2/METTL3/NEMF/… |
| GOBP_CYTOPLASMIC_TRANSLATION | -2.097907 | -0.5573015 | 0.0000000 | 0.0000570 | 0.7337620 | 134 | RPL13/METTL3/NEMF/YTHDF2/RPL37A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 2.428342 | 0.5655280 | 0.0000000 | 0.0000000 | 1.1330899 | 362 | IGHV5-10-1/IGKV1D-17/IGHV4-4/IGLV4-3/IGKV2-24/… |
| GOBP_IMMUNOGLOBULIN_PRODUCTION | 2.349868 | 0.6032476 | 0.0000000 | 0.0000016 | 0.8012156 | 161 | IGKV1D-17/IGLV4-3/IGKV2-24/IGLV5-45/IGLV5-37/… |
| GOBP_MEMBRANE_INVAGINATION | 2.476607 | 0.6786930 | 0.0000000 | 0.0000023 | 0.7749390 | 101 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_POSITIVE_REGULATION_OF_B_CELL_ACTIVATION | 2.417318 | 0.6643847 | 0.0000000 | 0.0000023 | 0.7749390 | 105 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHG2/IGHV1-69-2/… |
| GOBP_PHAGOCYTOSIS_RECOGNITION | 2.542065 | 0.7466718 | 0.0000000 | 0.0000027 | 0.7614608 | 65 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_COMPLEMENT_ACTIVATION | 2.527229 | 0.7339930 | 0.0000000 | 0.0000034 | 0.7614608 | 70 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_CELL_RECOGNITION | 2.380411 | 0.6502955 | 0.0000000 | 0.0000034 | 0.7614608 | 108 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE | 2.346764 | 0.6201149 | 0.0000000 | 0.0000034 | 0.7614608 | 133 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_B_CELL_RECEPTOR_SIGNALING_PATHWAY | 2.388981 | 0.6525518 | 0.0000000 | 0.0000037 | 0.7477397 | 103 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE_MEDIATED_BY_CIRCULATING_IMMUNOGLOBULIN | 2.449157 | 0.7048141 | 0.0000000 | 0.0000097 | 0.7195128 | 74 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOBP_AMIDE_BIOSYNTHETIC_PROCESS | -1.376894 | -0.3216217 | 0.0004876 | 0.0697338 | 0.4984931 | 633 | NT5C3B/RPS14/CNOT8/BZW2/RBM4/… |
| GOBP_RIBOSOME_BIOGENESIS | -1.546860 | -0.3983614 | 0.0004471 | 0.0651945 | 0.4984931 | 211 | MRM1/RPS14/FTSJ3/POP5/RPS25/… |
| GOBP_PEPTIDE_BIOSYNTHETIC_PROCESS | -1.425108 | -0.3374772 | 0.0004322 | 0.0642825 | 0.4984931 | 525 | RPS14/CNOT8/BZW2/RBM4/PAIP2B/… |
| GOBP_NCRNA_PROCESSING | -1.528356 | -0.3751245 | 0.0004294 | 0.0642825 | 0.4984931 | 323 | MRM1/RPS14/FTSJ3/MRTO4/TRMT5/… |
| GOBP_NUCLEAR_TRANSCRIBED_MRNA_CATABOLIC_PROCESS | -1.734358 | -0.4821648 | 0.0003501 | 0.0553943 | 0.4984931 | 106 | NT5C3B/CNOT8/MRTO4/NBDY/CNOT7/… |
| GOBP_NCRNA_METABOLIC_PROCESS | -1.449554 | -0.3477894 | 0.0002790 | 0.0450951 | 0.4984931 | 435 | MRM1/RPS14/FTSJ3/MRTO4/TRMT5/… |
| GOBP_RRNA_METABOLIC_PROCESS | -1.627560 | -0.4115841 | 0.0001403 | 0.0237122 | 0.5188481 | 239 | MRM1/RPS14/FTSJ3/MRTO4/KRI1/… |
| GOBP_PEPTIDE_METABOLIC_PROCESS | -1.431015 | -0.3341687 | 0.0000556 | 0.0096124 | 0.5573322 | 650 | NT5C3B/RPS14/CNOT8/BZW2/RBM4/… |
| GOBP_RNA_PROCESSING | -1.464733 | -0.3379647 | 0.0000113 | 0.0020058 | 0.5933255 | 783 | MRM1/RPS14/FTSJ3/RBM4/MRTO4/… |
| GOBP_CYTOPLASMIC_TRANSLATION | -1.925943 | -0.5184987 | 0.0000023 | 0.0004238 | 0.6272567 | 136 | RPS14/RBM4/RPL8/RPL7A/RPS25/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 1.974708 | 0.4436898 | 0.0000000 | 0.0000001 | 0.8870750 | 416 | TRBV18/TRBV5-5/TRBV10-1/TRAV27/TRBV6-6/… |
| GOBP_PROTEIN_DNA_COMPLEX_ASSEMBLY | 2.110473 | 0.5362001 | 0.0000008 | 0.0019645 | 0.6594444 | 130 | H1-4/H1-2/H3C2/H3C7/H3C3/… |
| GOBP_NUCLEOSOME_ASSEMBLY | 2.168932 | 0.6008862 | 0.0000027 | 0.0049671 | 0.6272567 | 74 | H1-4/H1-2/H3C2/H3C7/H3C3/… |
| GOBP_DNA_PACKAGING | 1.901859 | 0.4674458 | 0.0000056 | 0.0083538 | 0.6105269 | 162 | H1-4/H1-2/H3C2/H3C10/AKAP8L/… |
| GOBP_DEFENSE_RESPONSE_TO_BACTERIUM | 1.889156 | 0.4692451 | 0.0000202 | 0.0187732 | 0.5756103 | 148 | TRAV27/GSDMA/IGHV3-74/TNFRSF1A/IGKV3-20/… |
| GOBP_CHROMOSOME_CONDENSATION | 2.216715 | 0.7133027 | 0.0000310 | 0.0242270 | 0.5573322 | 34 | H1-4/H1-2/AKAP8L/H1-3/CDK1/… |
| GOBP_COMPLEMENT_ACTIVATION | 2.112851 | 0.5985522 | 0.0000326 | 0.0242270 | 0.5573322 | 61 | CLU/IGHV3-74/IGHV5-10-1/IGHV2-26/IGHV1-18/… |
| GOBP_REGULATION_OF_VIRAL_GENOME_REPLICATION | 2.069228 | 0.5804109 | 0.0000477 | 0.0322515 | 0.5573322 | 67 | EIF2AK2/IFITM3/OASL/MX1/OAS2/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_GENOME_REPLICATION | 2.131175 | 0.6471560 | 0.0000584 | 0.0354966 | 0.5573322 | 45 | EIF2AK2/IFITM3/OASL/MX1/OAS2/… |
| GOBP_PHAGOCYTOSIS_RECOGNITION | 2.026473 | 0.5935845 | 0.0001426 | 0.0623784 | 0.5188481 | 52 | IGHV3-74/IGHV5-10-1/IGHV2-26/IGHV1-18/IGHV3-73/… |
| GOBP_CYTOSOLIC_TRANSPORT | -1.736974 | -0.4689094 | 0.0001922 | 0.0752298 | 0.5188481 | 133 | USP7/TBC1D10C/DOP1A/ARFRP1/SNX5/… |
| GOBP_RIBOSOME_ASSEMBLY | -1.860932 | -0.5904210 | 0.0001876 | 0.0752298 | 0.5188481 | 50 | XRCC5/DDX28/SBDS/RPL10/MTERF3/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_COMPLEMENT_ACTIVATION | 2.431221 | 0.7407868 | 0.0000000 | 0.0000135 | 0.7477397 | 58 | C1QC/C1QB/CLU/C1QA/SERPING1/… |
| GOBP_NEGATIVE_REGULATION_OF_HEMOPOIESIS | 2.362874 | 0.7243888 | 0.0000001 | 0.0001007 | 0.7049757 | 56 | C1QC/ID2/SMAD7/RC3H2/BCL6/… |
| GOBP_CELL_JUNCTION_DISASSEMBLY | 2.152596 | 0.9558911 | 0.0000001 | 0.0001012 | 0.6901325 | 10 | C1QC/C1QB/C1QA |
| GOBP_HUMORAL_IMMUNE_RESPONSE_MEDIATED_BY_CIRCULATING_IMMUNOGLOBULIN | 2.283553 | 0.6942718 | 0.0000008 | 0.0004842 | 0.6594444 | 59 | C1QC/C1QB/CLU/C1QA/FCGR2B/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE | 2.104768 | 0.5693051 | 0.0000020 | 0.0008855 | 0.6272567 | 124 | C1QC/C1QB/CLU/C1QA/RNASE2/… |
| GOBP_REGULATION_OF_VIRAL_GENOME_REPLICATION | 2.257905 | 0.6685044 | 0.0000027 | 0.0010720 | 0.6272567 | 67 | IFITM2/IFITM3/IFITM1/OASL/RSAD2/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_GENOME_REPLICATION | 2.195126 | 0.7070095 | 0.0000117 | 0.0038146 | 0.5933255 | 44 | IFITM2/IFITM3/IFITM1/OASL/APOBEC3A/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_PROCESS | 2.093790 | 0.6222968 | 0.0000400 | 0.0093507 | 0.5573322 | 66 | IFITM2/IFITM3/IFITM1/OASL/RSAD2/… |
| GOBP_RESPONSE_TO_TYPE_I_INTERFERON | 2.113700 | 0.6548269 | 0.0001241 | 0.0238255 | 0.5188481 | 53 | IFI27/IFITM2/IFITM3/IFITM1/ISG15/… |
| GOBP_RESPONSE_TO_INTERFERON_BETA | 2.109431 | 0.7679136 | 0.0006536 | 0.0873725 | 0.4772708 | 24 | IFITM2/IFITM3/IFITM1/AIM2/OAS1/… |
| GOBP_MRNA_CIS_SPLICING_VIA_SPLICEOSOME | -1.905173 | -0.7051960 | 0.0007271 | 0.0938497 | 0.4772708 | 21 | WBP11/WBP4/RBM17/CWC15/PRPF40A/… |
| GOBP_CELL_CYCLE_PROCESS | -1.318557 | -0.2863453 | 0.0005959 | 0.0841725 | 0.4772708 | 731 | CCNY/AAAS/PPP2R2D/RAD51B/SDCCAG8/… |
| GOBP_MRNA_METABOLIC_PROCESS | -1.354830 | -0.2997693 | 0.0003729 | 0.0547292 | 0.4984931 | 566 | WBP11/MOV10/DCP1B/TBRG4/RNPC3/… |
| GOBP_MRNA_PROCESSING | -1.428778 | -0.3253080 | 0.0002580 | 0.0410893 | 0.4984931 | 402 | WBP11/WBP4/TBRG4/RNPC3/NBDY/… |
| GOBP_VERY_LONG_CHAIN_FATTY_ACID_METABOLIC_PROCESS | -1.962502 | -0.8056394 | 0.0001569 | 0.0279641 | 0.5188481 | 14 | ABCD4/HACD2/ABCD3/TECR/HSD17B4 |
| GOBP_NUCLEOSIDE_BISPHOSPHATE_METABOLIC_PROCESS | -1.822378 | -0.5053500 | 0.0001314 | 0.0245887 | 0.5188481 | 79 | DLST/HACD2/PDHA1/PAPSS1/TECR/… |
| GOBP_CELL_DIVISION | -1.527260 | -0.3476624 | 0.0000303 | 0.0075616 | 0.5756103 | 378 | CCNY/PPP2R2D/AHCTF1/NSMCE2/HAUS1/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_DEFENSE_RESPONSE_TO_SYMBIONT | 2.141260 | 0.5455479 | 0.0000000 | 0.0000935 | 0.7477397 | 176 | IFI27/APOBEC3A/IFIT1/IFITM1/IFI6/… |
| GOBP_RESPONSE_TO_VIRUS | 1.930244 | 0.4736786 | 0.0000002 | 0.0007369 | 0.6901325 | 232 | IFI27/IFI44L/APOBEC3A/IFIT1/IFITM1/… |
| GOBP_INNATE_IMMUNE_RESPONSE | 1.697166 | 0.3933076 | 0.0000003 | 0.0007756 | 0.6749629 | 391 | IFI27/C1QB/APOBEC3A/IFIT1/IFITM1/… |
| GOBP_RESPONSE_TO_TYPE_I_INTERFERON | 2.519005 | 0.7725815 | 0.0000008 | 0.0014283 | 0.6594444 | 45 | IFI27/IFIT1/IFITM1/ISG15/MX1/… |
| GOBP_DEFENSE_RESPONSE_TO_OTHER_ORGANISM | 1.548131 | 0.3589170 | 0.0000015 | 0.0022037 | 0.6435518 | 489 | IFI27/C1QB/IFI44L/APOBEC3A/IFIT1/… |
| GOBP_VIRAL_GENOME_REPLICATION | 2.268702 | 0.6151272 | 0.0000043 | 0.0052296 | 0.6105269 | 90 | IFI27/APOBEC3A/IFIT1/IFITM1/ISG15/… |
| GOBP_VIRAL_LIFE_CYCLE | 1.882133 | 0.4747575 | 0.0000459 | 0.0371390 | 0.5573322 | 179 | IFI27/APOBEC3A/IFIT1/IFITM1/ISG15/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_PROCESS | 2.200760 | 0.6426592 | 0.0000885 | 0.0586393 | 0.5384341 | 61 | APOBEC3A/IFIT1/IFITM1/ISG15/MX1/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_GENOME_REPLICATION | 2.161283 | 0.6687871 | 0.0001194 | 0.0724794 | 0.5384341 | 42 | APOBEC3A/IFIT1/IFITM1/ISG15/MX1/… |
| GOBP_SYNAPSE_PRUNING | 1.983686 | 0.9596783 | 0.0001393 | 0.0780763 | 0.5188481 | 6 | C1QB/C3 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_RESPONSE_TO_TYPE_I_INTERFERON | 2.538164 | 0.7676580 | 0.0000001 | 0.0000930 | 0.7049757 | 46 | IFI27/IFITM1/IFITM3/MX1/IFITM2/… |
| GOBP_DEFENSE_RESPONSE_TO_SYMBIONT | 2.163648 | 0.5400973 | 0.0000009 | 0.0007951 | 0.6594444 | 163 | IFI27/IFI6/IFITM1/IFITM3/DDIT4/… |
| GOBP_RESPONSE_TO_INTERFERON_BETA | 2.442482 | 0.8649229 | 0.0000011 | 0.0008919 | 0.6435518 | 21 | IFITM1/IFITM3/XAF1/IFITM2/CAPN2 |
| GOBP_VIRAL_GENOME_REPLICATION | 2.330376 | 0.6357810 | 0.0000012 | 0.0008919 | 0.6435518 | 87 | IFI27/IFITM1/IFITM3/MX1/ISG20/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_ENTRY_INTO_HOST_CELL | 2.160108 | 0.9026435 | 0.0000080 | 0.0052642 | 0.5933255 | 11 | IFITM1/IFITM3/LY6E/IFITM2/FCN1/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_GENOME_REPLICATION | 2.319381 | 0.7187952 | 0.0000817 | 0.0455892 | 0.5384341 | 38 | IFITM1/IFITM3/MX1/ISG20/IFITM2/… |
| GOBP_MODULATION_BY_HOST_OF_VIRAL_GENOME_REPLICATION | 2.141035 | 0.8545904 | 0.0000958 | 0.0496338 | 0.5384341 | 13 | IFI27/STOM |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_LIFE_CYCLE | 2.259694 | 0.8916490 | 0.0001154 | 0.0558291 | 0.5384341 | 14 | IFITM1/IFITM3/LY6E/IFITM2/FCN1/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_PROCESS | 2.341837 | 0.6833816 | 0.0001648 | 0.0612277 | 0.5188481 | 55 | IFITM1/IFITM3/LY6E/MX1/ISG20/… |
| GOBP_REGULATION_OF_BIOLOGICAL_PROCESS_INVOLVED_IN_SYMBIOTIC_INTERACTION | 2.324339 | 0.7864799 | 0.0003155 | 0.0995452 | 0.4984931 | 26 | IFITM1/IFITM3/LY6E/IFITM2/FCN1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_ANTIGEN_BINDING | 2.117260 | 0.5683886 | 8.39e-05 | 0.0351880 | 0.5384341 | 123 | TRAV8-4/TRAV12-2/IGHV3-74/IGHV3-73/IL7R/… |
| GOMF_SIGNALING_RECEPTOR_BINDING | 1.541810 | 0.3528771 | 7.53e-05 | 0.0351880 | 0.5384341 | 601 | S100B/TRAV8-4/FASLG/IGHV3-74/IGHV3-73/… |
| GOMF_LIGASE_ACTIVITY | -1.808788 | -0.4785819 | 6.30e-05 | 0.0351880 | 0.5384341 | 132 | PCCA/MARS2/NADSYN1/SAE1/AARS1/… |
| GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME | -1.892482 | -0.4919282 | 4.50e-06 | 0.0076226 | 0.6105269 | 148 | RPL13/MRPL41/RPL37A/RPL10A/RPL4/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_ANTIGEN_BINDING | 2.426503 | 0.6422670 | 0.00e+00 | 0.0000011 | 0.8012156 | 126 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOMF_IMMUNOGLOBULIN_RECEPTOR_BINDING | 2.523315 | 0.7445718 | 1.00e-07 | 0.0000583 | 0.7049757 | 61 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHV1-69-2/… |
| GOMF_SIGNALING_RECEPTOR_BINDING | 1.489865 | 0.3340615 | 2.67e-05 | 0.0088396 | 0.5756103 | 570 | IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/IGHV1-69-2/… |
| GOMF_RNA_BINDING | -1.362048 | -0.3039544 | 2.28e-05 | 0.0088396 | 0.5756103 | 1389 | MRM1/N4BP1/RPS14/CNOT8/FTSJ3/… |
| GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME | -1.902452 | -0.5004558 | 2.60e-06 | 0.0014351 | 0.6272567 | 149 | RPS14/RPL8/RPL7A/RPS25/MRPS35/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_ANTIGEN_BINDING | 1.967821 | 0.5092179 | 9.5e-06 | 0.015686 | 0.5933255 | 119 | IGLV3-21/IGKV2-30/IGHV3-74/IGKV3-20/HLA-DRB5/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_LAMIN_BINDING | 2.070413 | 0.9126548 | 6.71e-05 | 0.057805 | 0.5384341 | 10 | IFI27/SUN1 |
| GOMF_ANTIGEN_BINDING | 2.062157 | 0.6127616 | 7.26e-05 | 0.057805 | 0.5384341 | 55 | IGLV1-40/IGHV1-69D/CD1C/IGLV3-21/IGKV3-20/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_T_CELL_RECEPTOR_COMPLEX | 2.810769 | 0.8135368 | 0.0000000 | 0.0000000 | 1.0175448 | 79 | TRAV38-2DV8/TRAV8-4/TRAV26-2/TRAV12-2/TRBV20-1/… |
| GOCC_PLASMA_MEMBRANE_SIGNALING_RECEPTOR_COMPLEX | 2.480113 | 0.6597571 | 0.0000000 | 0.0000000 | 0.9101197 | 144 | TRAV38-2DV8/TRAV8-4/TRAV26-2/TRAV12-2/TRBV20-1/… |
| GOCC_RECEPTOR_COMPLEX | 2.372804 | 0.5944980 | 0.0000000 | 0.0000000 | 0.8986712 | 214 | TRAV38-2DV8/TRAV8-4/TRAV26-2/TRAV12-2/TRBV20-1/… |
| GOCC_PLASMA_MEMBRANE_PROTEIN_COMPLEX | 2.187653 | 0.5304449 | 0.0000000 | 0.0000000 | 0.8266573 | 280 | TRAV38-2DV8/TRAV8-4/TRAV26-2/TRAV12-2/TRBV20-1/… |
| GOCC_SOMATODENDRITIC_COMPARTMENT | 1.741551 | 0.4175717 | 0.0001192 | 0.0147172 | 0.5384341 | 315 | S100B/CYBB/SLA/STAT1/NCDN/… |
| GOCC_MEMBRANE_PROTEIN_COMPLEX | 1.436362 | 0.3236719 | 0.0002872 | 0.0236458 | 0.4984931 | 739 | TRAV38-2DV8/TRAV8-4/TRAV26-2/TRAV12-2/TRBV20-1/… |
| GOCC_CELL_BODY | 1.764825 | 0.4429838 | 0.0007927 | 0.0602450 | 0.4772708 | 212 | S100B/CYBB/NCDN/TPX2/TOP1/… |
| GOCC_MHC_CLASS_II_PROTEIN_COMPLEX | 1.834732 | 0.7393508 | 0.0013192 | 0.0930986 | 0.4550599 | 15 | HLA-DQB2/HLA-DRB5/HLA-DOB/HLA-DRB1/HLA-DPA1/… |
| GOCC_CYTOSOLIC_RIBOSOME | -1.783070 | -0.4942858 | 0.0002826 | 0.0236458 | 0.4984931 | 91 | RPL37A/RPS27/RPL10A/RPL4/RPL27A/… |
| GOCC_INTRACELLULAR_PROTEIN_CONTAINING_COMPLEX | -1.425841 | -0.3218254 | 0.0001487 | 0.0157025 | 0.5188481 | 595 | PPP2R1B/PSMB10/CSNK2B/PPP4R2/FBXO8/… |
| GOCC_RIBOSOMAL_SUBUNIT | -1.632796 | -0.4211812 | 0.0001589 | 0.0157025 | 0.5188481 | 161 | RPL13/MRPL41/RPL37A/RPS27/RPL10A/… |
| GOCC_CATALYTIC_COMPLEX | -1.335781 | -0.2921304 | 0.0000979 | 0.0138217 | 0.5384341 | 1165 | GMPR2/PPP2R1B/RCOR3/POP7/PSMB10/… |
| GOCC_LARGE_RIBOSOMAL_SUBUNIT | -1.880783 | -0.5119930 | 0.0000398 | 0.0065560 | 0.5573322 | 103 | RPL13/MRPL41/RPL10A/RPL4/MRPL55/… |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | -2.227967 | -0.6842072 | 0.0000005 | 0.0000913 | 0.6749629 | 50 | RPL13/RPL37A/RPL10A/RPL4/RPL27A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_IMMUNOGLOBULIN_COMPLEX | 2.877338 | 0.7668488 | 0.0000000 | 0.0000000 | 1.1512205 | 124 | IGHV5-10-1/IGKV1D-17/IGHV4-4/IGLV4-3/IGKV2-24/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX_CIRCULATING | 2.501185 | 0.7606068 | 0.0000000 | 0.0000002 | 0.8140358 | 60 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOCC_EXTERNAL_SIDE_OF_PLASMA_MEMBRANE | 2.120924 | 0.5267277 | 0.0000000 | 0.0000056 | 0.7337620 | 209 | IGHV5-10-1/IGHV4-4/CD69/IGHV3-20/IGHV1-24/… |
| GOCC_CELL_SURFACE | 1.841232 | 0.4294251 | 0.0000002 | 0.0000448 | 0.6901325 | 355 | IGHV5-10-1/IGHV4-4/CD69/IGHV3-20/IGHV1-24/… |
| GOCC_SIDE_OF_MEMBRANE | 1.819743 | 0.4293807 | 0.0000008 | 0.0001334 | 0.6594444 | 333 | IGHV5-10-1/IGHV4-4/IGHV3-20/IGHV1-24/IGHG2/… |
| GOCC_CYTOCHROME_COMPLEX | -1.875807 | -0.6551574 | 0.0003830 | 0.0189385 | 0.4984931 | 29 | COX6C/COX7B/NDUFA4/UQCR11/COX7C/… |
| GOCC_RESPIRATORY_CHAIN_COMPLEX_IV | -1.910721 | -0.7750233 | 0.0002386 | 0.0124183 | 0.5188481 | 15 | COX6C/COX7B/NDUFA4/COX7C/COX5A/… |
| GOCC_CYTOSOLIC_SMALL_RIBOSOMAL_SUBUNIT | -1.883362 | -0.6242331 | 0.0001859 | 0.0102117 | 0.5188481 | 39 | RPS14/RPS25/RPS17/RPS28/RPS27A/… |
| GOCC_POLYSOMAL_RIBOSOME | -1.985078 | -0.6933221 | 0.0000647 | 0.0045713 | 0.5384341 | 29 | RPL8/RPL7A/RPL18A/RPS28/RPL39/… |
| GOCC_LARGE_RIBOSOMAL_SUBUNIT | -1.843497 | -0.5156260 | 0.0000243 | 0.0019993 | 0.5756103 | 103 | RPL8/RPL7A/MRPL52/RPL35A/MRPL44/… |
| GOCC_RESPIRASOME | -1.921574 | -0.5485906 | 0.0000181 | 0.0017926 | 0.5756103 | 89 | COX6C/COX7B/NDUFA4/UQCR11/HIGD2A/… |
| GOCC_POLYSOME | -2.060628 | -0.6289142 | 0.0000071 | 0.0007761 | 0.6105269 | 58 | MCRS1/RPL8/RPL7A/AGO1/RPL18A/… |
| GOCC_RIBOSOMAL_SUBUNIT | -1.837359 | -0.4836669 | 0.0000060 | 0.0007453 | 0.6105269 | 160 | RPS14/RPL8/RPL7A/RPS25/MRPS28/… |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | -2.091839 | -0.6563991 | 0.0000018 | 0.0002555 | 0.6435518 | 50 | RPL8/RPL7A/RPL35A/RPL18A/RPL14/… |
| GOCC_CYTOSOLIC_RIBOSOME | -2.145407 | -0.6109694 | 0.0000000 | 0.0000056 | 0.7337620 | 91 | RPS14/RPL8/RPL7A/RPS25/RPL35A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_IMMUNOGLOBULIN_COMPLEX | 2.160772 | 0.5807815 | 0.0000011 | 0.0009178 | 0.6435518 | 92 | IGLV3-21/IGKV2-30/IGHV3-74/IGKV3-20/IGLV9-49/… |
| GOCC_T_CELL_RECEPTOR_COMPLEX | 2.082930 | 0.5608558 | 0.0000019 | 0.0009178 | 0.6435518 | 95 | TRBV18/TRBV5-5/TRBV10-1/TRBV6-6/TRAV14DV4/… |
| GOCC_DNA_PACKAGING_COMPLEX | 2.092896 | 0.5757801 | 0.0000074 | 0.0024103 | 0.6105269 | 80 | H1-4/H1-2/H3C2/H3C10/H3C7/… |
| GOCC_PROTEIN_DNA_COMPLEX | 1.858029 | 0.4773662 | 0.0000101 | 0.0024698 | 0.5933255 | 135 | H1-4/H1-2/H3C2/H3C10/H3C7/… |
| GOCC_PLASMA_MEMBRANE_PROTEIN_COMPLEX | 1.647734 | 0.3837028 | 0.0000323 | 0.0052788 | 0.5573322 | 283 | TRBV18/TRBV5-5/TRBV10-1/TRAV27/TRBV6-6/… |
| GOCC_SPHERICAL_HIGH_DENSITY_LIPOPROTEIN_PARTICLE | 1.624244 | 0.9913244 | 0.0000275 | 0.0052788 | 0.5756103 | 3 | CLU/APOM |
| GOCC_PLASMA_MEMBRANE_SIGNALING_RECEPTOR_COMPLEX | 1.820071 | 0.4537437 | 0.0000505 | 0.0070777 | 0.5573322 | 154 | TRBV18/TRBV5-5/TRBV10-1/TRAV27/TRBV6-6/… |
| GOCC_RECEPTOR_COMPLEX | 1.587443 | 0.3831787 | 0.0001581 | 0.0193832 | 0.5188481 | 213 | TRBV18/TRBV5-5/TRBV10-1/TRAV27/TRBV6-6/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX_CIRCULATING | 1.939151 | 0.5819781 | 0.0002973 | 0.0291684 | 0.4984931 | 48 | IGHV3-74/IGKV3-20/IGHV5-10-1/IGHV2-26/IGHV1-18/… |
| GOCC_CALCIUM_CHANNEL_COMPLEX | 2.013736 | 0.7540111 | 0.0005838 | 0.0520672 | 0.4772708 | 17 | PDE4D/PDE4B/MCUB/MICU1/PTPA/… |
| GOCC_AUTOPHAGOSOME_MEMBRANE | -1.785206 | -0.5962987 | 0.0009338 | 0.0763369 | 0.4772708 | 39 | ATP6AP2/ATG9A/RAB7A/TECPR1/ATG14/… |
| GOCC_MITOCHONDRIAL_PROTEIN_CONTAINING_COMPLEX | -1.574719 | -0.3988374 | 0.0002286 | 0.0249173 | 0.5188481 | 250 | MRPL35/SDHB/IMMP1L/UQCR11/MRPL38/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_BLOOD_MICROPARTICLE | 2.293026 | 0.7360336 | 0.0000003 | 0.0001473 | 0.6749629 | 52 | C1QC/C1QB/CLU/IGLV3-21/HSPA1B/… |
| GOCC_EXTERNAL_ENCAPSULATING_STRUCTURE | 2.122080 | 0.5969957 | 0.0000003 | 0.0001473 | 0.6749629 | 109 | C1QC/C1QB/CLU/C1QA/SERPING1/… |
| GOCC_COLLAGEN_TRIMER | 2.059335 | 0.9498680 | 0.0000009 | 0.0002230 | 0.6594444 | 10 | C1QC/C1QB/MARCO/C1QA/COL8A2 |
| GOCC_COLLAGEN_CONTAINING_EXTRACELLULAR_MATRIX | 2.113216 | 0.6098160 | 0.0000029 | 0.0005685 | 0.6272567 | 89 | C1QC/C1QB/C1QA/SERPING1/LGALS3BP/… |
| GOCC_HIGH_DENSITY_LIPOPROTEIN_PARTICLE | 1.911153 | 0.9667868 | 0.0000337 | 0.0055450 | 0.5573322 | 7 | CLU/LCAT/PLA2G7 |
| GOCC_CELL_SURFACE | 1.570645 | 0.3805374 | 0.0000698 | 0.0098402 | 0.5384341 | 362 | CLU/ABCA1/CD40/FCGR2B/FOLR3/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX | 1.775126 | 0.5226512 | 0.0004965 | 0.0535858 | 0.4772708 | 83 | IGLV3-21/IGLV3-1/TRDC/IGLV3-19/IGKV2D-30/… |
| GOCC_DNA_POLYMERASE_COMPLEX | -1.827871 | -0.7049446 | 0.0010862 | 0.0765785 | 0.4550599 | 18 | MAD2L2/POLD2/POLG/POLD3/PRIM2/… |
| GOCC_INTRACELLULAR_PROTEIN_CONTAINING_COMPLEX | -1.331250 | -0.2914627 | 0.0006548 | 0.0538577 | 0.4772708 | 576 | PPP2R2D/SYVN1/PEF1/FBXW5/DERL2/… |
| GOCC_MITOCHONDRIAL_PROTEIN_CONTAINING_COMPLEX | -1.474939 | -0.3492730 | 0.0005972 | 0.0535858 | 0.4772708 | 251 | MFN1/TIMM22/CYC1/MRPL55/TIMM50/… |
| GOCC_THO_COMPLEX | -1.762032 | -0.8997731 | 0.0005736 | 0.0535858 | 0.4772708 | 6 | THOC1/THOC2/THOC6 |
| GOCC_TRANSFERASE_COMPLEX | -1.350484 | -0.2945855 | 0.0003318 | 0.0409368 | 0.4984931 | 609 | CCNY/HCFC2/PIGM/SYVN1/DLST/… |
| GOCC_CATALYTIC_COMPLEX | -1.407214 | -0.2954801 | 0.0000008 | 0.0002230 | 0.6594444 | 1120 | CCNY/HCFC2/PPP2R2D/PIGM/DLST/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_UBIQUITIN_CONJUGATING_ENZYME_COMPLEX | -1.776950 | -0.8913531 | 0.0001535 | 0.0738115 | 0.5188481 | 7 | UBE2B/RNF20/UBE2N/UBE2A |
| GOCC_MITOCHONDRIAL_PROTEIN_CONTAINING_COMPLEX | -1.749688 | -0.4445528 | 0.0000022 | 0.0020777 | 0.6272567 | 227 | SDHB/CHCHD3/MRPS9/NDUFS4/NDUFB9/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_NUCLEAR_SIGNALING_BY_ERBB4 | 2.237662 | 0.9034019 | 0.0000124 | 0.0016262 | 0.5933255 | 15 | S100B/TAB2/ADAM17 |
| REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 1.515736 | 0.3530944 | 0.0003204 | 0.0297386 | 0.4984931 | 501 | S100B/FASLG/NFKBIA/EIF2AK2/IFNGR1/… |
| REACTOME_SIGNALING_BY_ERBB4 | 2.250647 | 0.7763313 | 0.0005032 | 0.0417958 | 0.4772708 | 29 | S100B/TAB2 |
| REACTOME_DDX58_IFIH1_MEDIATED_INDUCTION_OF_INTERFERON_ALPHA_BETA | 2.189480 | 0.6625634 | 0.0007737 | 0.0530800 | 0.4772708 | 59 | S100B/NFKBIA/TNFAIP3/HERC5/IRF7/… |
| REACTOME_FASL_CD95L_SIGNALING | 1.760740 | 0.9416871 | 0.0009368 | 0.0615960 | 0.4772708 | 5 | FASLG/CASP10 |
| REACTOME_TAK1_ACTIVATES_NFKB_BY_PHOSPHORYLATION_AND_ACTIVATION_OF_IKKS_COMPLEX | 2.084233 | 0.7189291 | 0.0014383 | 0.0785566 | 0.4550599 | 29 | S100B/NFKBIA/S100A12/NKIRAS2/TRAF6 |
| REACTOME_ADVANCED_GLYCOSYLATION_ENDPRODUCT_RECEPTOR_SIGNALING | 2.034035 | 0.8809841 | 0.0013110 | 0.0785566 | 0.4550599 | 11 | S100B/LGALS3/S100A12 |
| REACTOME_TRAF6_MEDIATED_NF_KB_ACTIVATION | 2.216092 | 0.8013799 | 0.0016318 | 0.0804660 | 0.4550599 | 22 | S100B/NFKBIA/S100A12/NKIRAS2/TRAF6 |
| REACTOME_MITOTIC_TELOPHASE_CYTOKINESIS | 1.853543 | 0.7906865 | 0.0016039 | 0.0804660 | 0.4550599 | 12 | KIF23/PLK1/WAPL/PDS5B/PDS5A/… |
| REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL | 1.929572 | 0.5127398 | 0.0021142 | 0.0953214 | 0.4317077 | 130 | TRAV8-4/KIR3DL1/IGLV3-21/IGKV4-1/IGLV3-1/… |
| REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S | -1.912835 | -0.5771269 | 0.0004972 | 0.0417958 | 0.4772708 | 52 | RPS27/EIF4EBP1/RPS2/RPS23/EIF4B/… |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | -1.967008 | -0.5513496 | 0.0000034 | 0.0004815 | 0.6272567 | 99 | RPL13/RPL37A/RPS27/RPL10A/RPL4/… |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | -1.987362 | -0.5469227 | 0.0000017 | 0.0003003 | 0.6435518 | 107 | RPL13/SEC61G/RPL37A/RPS27/RPL10A/… |
| REACTOME_SELENOAMINO_ACID_METABOLISM | -2.033909 | -0.5688141 | 0.0000008 | 0.0001575 | 0.6594444 | 98 | RPL13/RPL37A/RPL10A/RPL4/RPL27A/… |
| REACTOME_NONSENSE_MEDIATED_DECAY_NMD | -2.028055 | -0.5551614 | 0.0000004 | 0.0001098 | 0.6749629 | 110 | RPL13/RPL37A/RPS27/RPL10A/NCBP2/… |
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | -2.105282 | -0.5998711 | 0.0000004 | 0.0001098 | 0.6749629 | 86 | RPL13/RPL37A/RPL10A/RPL4/RPL27A/… |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | -2.057748 | -0.5802567 | 0.0000002 | 0.0000858 | 0.6901325 | 94 | RPL13/RPL37A/RPS27/RPL10A/RPL4/… |
| REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES | -1.859176 | -0.4578624 | 0.0000002 | 0.0000827 | 0.6901325 | 252 | RPL13/PSMB10/OGDH/RPL37A/PSMC3/… |
| REACTOME_REGULATION_OF_EXPRESSION_OF_SLITS_AND_ROBOS | -2.044104 | -0.5317577 | 0.0000001 | 0.0000459 | 0.7195128 | 150 | RPL13/PSMB10/RPL37A/PSMC3/RPS27/… |
| REACTOME_SIGNALING_BY_ROBO_RECEPTORS | -2.025743 | -0.5175251 | 0.0000000 | 0.0000221 | 0.7477397 | 180 | RPL13/PSMB10/VASP/RPL37A/PSMC3/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_CELL_SURFACE_INTERACTIONS_AT_THE_VASCULAR_WALL | 1.931837 | 0.5203953 | 0.0001405 | 0.0121494 | 0.5188481 | 114 | IGKV2D-30/IGHV2-70/JCHAIN/IGLV7-43/DOK2/… |
| REACTOME_ROLE_OF_PHOSPHOLIPIDS_IN_PHAGOCYTOSIS | 1.966460 | 0.5749386 | 0.0002815 | 0.0219183 | 0.4984931 | 71 | IGHG2/IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/… |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 1.955028 | 0.5939960 | 0.0004891 | 0.0346166 | 0.4772708 | 55 | IGHG2/IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/… |
| REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION | 1.916228 | 0.5752834 | 0.0007124 | 0.0482238 | 0.4772708 | 60 | IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/IGKV1D-16/… |
| REACTOME_COMPLEMENT_CASCADE | 1.947265 | 0.5812776 | 0.0008428 | 0.0529726 | 0.4772708 | 64 | IGHG2/IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/… |
| REACTOME_FCGR_ACTIVATION | 1.871883 | 0.5619701 | 0.0011434 | 0.0642208 | 0.4550599 | 60 | IGHG2/IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/… |
| REACTOME_FCERI_MEDIATED_CA_2_MOBILIZATION | 1.862655 | 0.5404683 | 0.0011961 | 0.0642208 | 0.4550599 | 75 | IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/IGKV1D-16/… |
| REACTOME_CREATION_OF_C4_AND_C2_ACTIVATORS | 1.933300 | 0.5891708 | 0.0014435 | 0.0721250 | 0.4550599 | 53 | IGHG2/IGKV2D-30/IGHV2-70/IGLV7-43/IGKV2D-28/… |
| REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA | 1.897282 | 0.5867983 | 0.0017384 | 0.0820200 | 0.4550599 | 49 | IGKV2D-30/IGHV2-70/IGKV2D-28/IGHA1/IGKV1D-16/… |
| REACTOME_FCERI_MEDIATED_MAPK_ACTIVATION | 1.806300 | 0.5281123 | 0.0020414 | 0.0908137 | 0.4317077 | 71 | IGKV2D-30/JUN/IGHV2-70/IGLV7-43/IGKV2D-28/… |
| REACTOME_RESPIRATORY_ELECTRON_TRANSPORT | -1.884359 | -0.5378053 | 0.0000102 | 0.0010639 | 0.5933255 | 96 | COX16/COX6C/COX7B/NDUFA4/UQCR11/… |
| REACTOME_INFLUENZA_INFECTION | -1.864096 | -0.5015747 | 0.0000031 | 0.0003989 | 0.6272567 | 150 | RPS14/RPL8/RPL7A/POLR2I/RPS25/… |
| REACTOME_CELLULAR_RESPONSE_TO_STARVATION | -1.905033 | -0.5187183 | 0.0000014 | 0.0002366 | 0.6435518 | 136 | RPS14/RPL8/RPL7A/RPS25/RPL35A/… |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | -2.074744 | -0.5865145 | 0.0000000 | 0.0000091 | 0.7195128 | 108 | RPS14/RPL8/RPL7A/RPS25/RPL35A/… |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | -2.090081 | -0.5983459 | 0.0000000 | 0.0000091 | 0.7195128 | 95 | RPS14/RPL7A/RPS25/RPL35A/RPS17/… |
| REACTOME_REGULATION_OF_EXPRESSION_OF_SLITS_AND_ROBOS | -2.041356 | -0.5502725 | 0.0000000 | 0.0000029 | 0.7477397 | 149 | RPS14/RPL8/RPL7A/RPS25/PSMD5/… |
| REACTOME_SELENOAMINO_ACID_METABOLISM | -2.128612 | -0.6085994 | 0.0000000 | 0.0000029 | 0.7477397 | 98 | RPS14/RPL8/RPL7A/RPS25/RPL35A/… |
| REACTOME_NONSENSE_MEDIATED_DECAY_NMD | -2.149436 | -0.6056000 | 0.0000000 | 0.0000021 | 0.7749390 | 111 | RPS14/RPL8/RPL7A/SMG8/RPS25/… |
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | -2.211749 | -0.6367378 | 0.0000000 | 0.0000021 | 0.7614608 | 87 | RPS14/RPL7A/RPS25/RPL35A/RPS17/… |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | -2.184671 | -0.6246276 | 0.0000000 | 0.0000015 | 0.7881868 | 98 | RPS14/RPL8/RPL7A/RPS25/RPL35A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 2.127200 | 0.6453466 | 0.0000285 | 0.0148474 | 0.5756103 | 48 | IGLV3-21/IGKV2-30/IGHV1-2/IGKV3-20/IGHV3-48/… |
| REACTOME_CREATION_OF_C4_AND_C2_ACTIVATORS | 2.078025 | 0.6367109 | 0.0000274 | 0.0148474 | 0.5756103 | 45 | IGLV3-21/IGKV2-30/IGHV1-2/IGKV3-20/IGHV3-48/… |
| REACTOME_COMPLEMENT_CASCADE | 2.066804 | 0.5998780 | 0.0000640 | 0.0200087 | 0.5384341 | 57 | IGLV3-21/IGKV2-30/CLU/IGHV1-2/IGKV3-20/… |
| REACTOME_CD22_MEDIATED_BCR_REGULATION | 2.024266 | 0.6202391 | 0.0000586 | 0.0200087 | 0.5573322 | 45 | IGLV3-21/IGKV2-30/IGHV1-2/IGKV3-20/IGHV3-48/… |
| REACTOME_HDMS_DEMETHYLATE_HISTONES | 2.067104 | 0.6666766 | 0.0001782 | 0.0422035 | 0.5188481 | 36 | H3C2/H3C10/ARID5B/H3C7/KDM5D/… |
| REACTOME_FORMATION_OF_SENESCENCE_ASSOCIATED_HETEROCHROMATIN_FOCI_SAHF | 2.044491 | 0.8274335 | 0.0002370 | 0.0422035 | 0.5188481 | 13 | H1-4/H1-2/H1-3/ASF1A/H1-5 |
| REACTOME_INTERLEUKIN_7_SIGNALING | 2.005942 | 0.7153777 | 0.0002543 | 0.0422035 | 0.4984931 | 21 | H3C2/H3C10/H3C7/H3C3/H3C13/… |
| REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION | 1.955232 | 0.5872321 | 0.0003124 | 0.0422035 | 0.4984931 | 50 | IGLV3-21/IGKV2-30/IGHV1-2/IGKV3-20/IGHV3-48/… |
| REACTOME_FCGR_ACTIVATION | 1.950651 | 0.5858563 | 0.0003238 | 0.0422035 | 0.4984931 | 50 | IGLV3-21/IGKV2-30/IGHV1-2/IGKV3-20/IGHV3-48/… |
| REACTOME_PEPTIDE_LIGAND_BINDING_RECEPTORS | 1.960903 | 0.6488797 | 0.0008084 | 0.0602090 | 0.4772708 | 30 | CCR7/CXCR4/CCL3L3/POMC/CCR5/… |
| REACTOME_RAB_GERANYLGERANYLATION | -1.732910 | -0.6004349 | 0.0020598 | 0.0826044 | 0.4317077 | 33 | RAB4B/RAB7A/RAB2B/RAB8A/RAB39B/… |
| REACTOME_METABOLISM_OF_ANGIOTENSINOGEN_TO_ANGIOTENSINS | -1.734302 | -0.8540047 | 0.0017201 | 0.0747270 | 0.4550599 | 7 | ATP6AP2/CTSZ/AOPEP/ACE/GZMH |
| REACTOME_METABOLISM_OF_AMINO_ACIDS_AND_DERIVATIVES | -1.487242 | -0.3801159 | 0.0014347 | 0.0723806 | 0.4550599 | 245 | OAZ1/MTR/MTRR/SARDH/SEM1/… |
| REACTOME_REGULATION_OF_EXPRESSION_OF_SLITS_AND_ROBOS | -1.586072 | -0.4296856 | 0.0010839 | 0.0627842 | 0.4550599 | 147 | NCBP2/SEM1/RPL10/RPL7A/RPL14/… |
| REACTOME_SIGNALING_BY_ROBO_RECEPTORS | -1.586987 | -0.4205260 | 0.0008839 | 0.0627842 | 0.4772708 | 173 | NCBP2/VASP/SEM1/RPL10/RPL7A/… |
| REACTOME_NUCLEOTIDE_EXCISION_REPAIR | -1.674600 | -0.4730561 | 0.0010321 | 0.0627842 | 0.4550599 | 104 | USP7/RFC2/RAD23B/RFC3/GTF2H2/… |
| REACTOME_ASSOCIATION_OF_TRIC_CCT_WITH_TARGET_PROTEINS_DURING_BIOSYNTHESIS | -1.826008 | -0.6527378 | 0.0010579 | 0.0627842 | 0.4550599 | 28 | LONP2/CCT2/CCT4/DCAF7/CCNE2/… |
| REACTOME_SIGNALING_BY_MET | -1.834942 | -0.6195994 | 0.0007180 | 0.0563916 | 0.4772708 | 37 | RAB4B/PTPN2/CBL/RANBP9/NRAS/… |
| REACTOME_TRANSLATION | -1.498196 | -0.3771111 | 0.0005766 | 0.0548477 | 0.4772708 | 277 | MRPL35/MRPL38/APEH/SSR1/MRPS16/… |
| REACTOME_NERVOUS_SYSTEM_DEVELOPMENT | -1.556695 | -0.3854789 | 0.0002612 | 0.0422035 | 0.4984931 | 320 | DLG1/NCBP2/VASP/APH1B/DLG3/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_COMPLEMENT_CASCADE | 2.328440 | 0.7095176 | 0.0000001 | 0.0001199 | 0.7049757 | 61 | C1QC/C1QB/CLU/C1QA/IGHV1-2/… |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 2.334721 | 0.7274181 | 0.0000004 | 0.0003283 | 0.6749629 | 51 | C1QC/C1QB/C1QA/IGHV1-2/IGLV3-21/… |
| REACTOME_CREATION_OF_C4_AND_C2_ACTIVATORS | 2.300390 | 0.7276567 | 0.0000010 | 0.0004966 | 0.6594444 | 47 | C1QC/C1QB/C1QA/IGHV1-2/IGLV3-21/… |
| REACTOME_INTERFERON_SIGNALING | 1.912553 | 0.5074714 | 0.0000025 | 0.0009754 | 0.6272567 | 158 | FCGR1A/IFI27/IFITM2/HERC5/MT2A/… |
| REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | 2.138732 | 0.6781072 | 0.0000342 | 0.0089123 | 0.5573322 | 46 | IFITM2/IFITM3/IFITM1/RSAD2/IFI6/… |
| REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL | 1.946263 | 0.5333351 | 0.0000416 | 0.0092853 | 0.5573322 | 122 | FCGR1A/IFITM1/IGHV1-2/IGLV3-21/IGLV3-1/… |
| REACTOME_BINDING_AND_UPTAKE_OF_LIGANDS_BY_SCAVENGER_RECEPTORS | 1.933008 | 0.5894729 | 0.0001498 | 0.0208972 | 0.5188481 | 59 | MARCO/IGHV1-2/IGLV3-21/IGLV3-1/HP/… |
| REACTOME_FCGR_ACTIVATION | 1.975826 | 0.6105527 | 0.0002215 | 0.0266277 | 0.5188481 | 52 | FCGR1A/IGHV1-2/IGLV3-21/IGLV3-1/IGLV3-19/… |
| REACTOME_FORMATION_OF_FIBRIN_CLOT_CLOTTING_CASCADE | 1.853646 | 0.8910449 | 0.0004117 | 0.0459580 | 0.4984931 | 8 | SERPING1/PF4/F5/F13A1 |
| REACTOME_ANTI_INFLAMMATORY_RESPONSE_FAVOURING_LEISHMANIA_PARASITE_INFECTION | 1.714911 | 0.4837702 | 0.0005785 | 0.0602809 | 0.4772708 | 104 | FCGR1A/ADM/IGHV1-2/IGLV3-21/IGLV3-1/… |
| REACTOME_M_PHASE | -1.438456 | -0.3445277 | 0.0008100 | 0.0744759 | 0.4772708 | 238 | AAAS/PPP2R2D/SDCCAG8/AHCTF1/HAUS1/… |
| REACTOME_GLOBAL_GENOME_NUCLEOTIDE_EXCISION_REPAIR_GG_NER | -1.817091 | -0.5111542 | 0.0001604 | 0.0208972 | 0.5188481 | 74 | POLD2/XPC/PCNA/POLD3/UBE2N/… |
| REACTOME_RHOBTB_GTPASE_CYCLE | -2.002547 | -0.6679343 | 0.0000925 | 0.0160571 | 0.5384341 | 30 | CCT2/VIM/HNRNPC/STK38/COPS2/… |
| REACTOME_RHOBTB1_GTPASE_CYCLE | -2.056884 | -0.7621998 | 0.0000516 | 0.0100903 | 0.5573322 | 19 | CCT2/VIM/HNRNPC/STK38/COPS2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_INTERFERON_SIGNALING | 2.208966 | 0.5825714 | 0.0e+00 | 0.0000496 | 0.7195128 | 142 | IFI27/IFIT1/IFITM1/IFI6/ISG15/… |
| REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | 2.520403 | 0.7853826 | 1.7e-06 | 0.0013139 | 0.6435518 | 42 | IFIT1/IFITM1/IFI6/ISG15/MX1/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_INTERFERON_SIGNALING | 2.321979 | 0.6088425 | 0.0000000 | 0.0000172 | 0.7477397 | 138 | IFI27/IFI6/IFITM1/IFITM3/XAF1/… |
| REACTOME_INTERFERON_ALPHA_BETA_SIGNALING | 2.543723 | 0.7886879 | 0.0000001 | 0.0000974 | 0.6901325 | 43 | IFI6/IFITM1/IFITM3/XAF1/MX1/… |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 2.207567 | 0.8119057 | 0.0000623 | 0.0316444 | 0.5384341 | 19 | IGLV1-40/C1QB/IGLV3-21/IGKV3-20/FCN1/… |
| REACTOME_COMPLEMENT_CASCADE | 2.266258 | 0.7768909 | 0.0001013 | 0.0385509 | 0.5384341 | 24 | IGLV1-40/C1QB/IGLV3-21/IGKV3-20/FCN1/… |
| REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 1.593366 | 0.3619088 | 0.0001716 | 0.0522715 | 0.5188481 | 386 | IFI27/IFI6/IFITM1/IFITM3/XAF1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_GRAFT_VERSUS_HOST_DISEASE | 2.014249 | 0.6957889 | 0.0003162 | 0.0292455 | 0.4984931 | 32 | FASLG/KIR3DL1/HLA-DRB5/HLA-DOB/HLA-DRB1/… |
| KEGG_RIBOSOME | -2.052827 | -0.5831366 | 0.0000021 | 0.0003909 | 0.6272567 | 84 | RPL13/RPL37A/RPS27/RPL10A/RPL4/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_ALZHEIMERS_DISEASE | -1.559612 | -0.4242751 | 0.0031687 | 0.0982283 | 0.4317077 | 124 | COX6C/COX7B/NDUFA4/TNF/UQCR11/… |
| KEGG_OXIDATIVE_PHOSPHORYLATION | -1.603407 | -0.4425057 | 0.0023696 | 0.0982283 | 0.4317077 | 112 | COX6C/COX7B/NDUFA4/UQCR11/COX7C/… |
| KEGG_ASTHMA | -1.722090 | -0.6940613 | 0.0027546 | 0.0982283 | 0.4317077 | 16 | TNF/HLA-DPB1/HLA-DRA |
| KEGG_PARKINSONS_DISEASE | -1.735147 | -0.4814700 | 0.0002654 | 0.0164542 | 0.4984931 | 106 | COX6C/COX7B/NDUFA4/UQCR11/COX7C/… |
| KEGG_CARDIAC_MUSCLE_CONTRACTION | -1.992528 | -0.7030220 | 0.0000481 | 0.0044722 | 0.5573322 | 30 | COX6C/COX7B/UQCR11/COX7C/ATP1B3/… |
| KEGG_RIBOSOME | -2.254719 | -0.6477164 | 0.0000000 | 0.0000007 | 0.7614608 | 84 | RPL8/RPL7A/RPS25/RPL35A/RPS17/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 1.894826 | 0.534839 | 0.0003954 | 0.0731415 | 0.4984931 | 71 | H3C2/H3C10/H3C7/H3C3/HLA-DRB5/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 2.272672 | 0.6778046 | 3.00e-07 | 0.0000508 | 0.6749629 | 65 | C1QC/C1QB/FCGR1A/C1QA/CD40/… |
| KEGG_COMPLEMENT_AND_COAGULATION_CASCADES | 2.074830 | 0.8554510 | 6.37e-05 | 0.0059279 | 0.5384341 | 15 | C1QC/C1QA/SERPING1/CR1/F5/… |
| KEGG_PRION_DISEASES | 2.036097 | 0.7755857 | 1.67e-04 | 0.0103536 | 0.5188481 | 21 | C1QC/C1QB/C1QA/IL1B/CCL5/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_HUNTINGTONS_DISEASE | -1.683514 | -0.4568229 | 0.0005711 | 0.0303615 | 0.4772708 | 125 | SDHB/NDUFS4/APAF1/NDUFB9/NDUFS7/… |
| KEGG_OXIDATIVE_PHOSPHORYLATION | -1.691128 | -0.4645116 | 0.0006600 | 0.0303615 | 0.4772708 | 106 | SDHB/NDUFS4/NDUFB9/NDUFS7/NDUFB7/… |
| KEGG_PARKINSONS_DISEASE | -1.725441 | -0.4776757 | 0.0005921 | 0.0303615 | 0.4772708 | 102 | SDHB/NDUFS4/APAF1/UBE2J2/NDUFB9/… |
| KEGG_LYSOSOME | -1.859359 | -0.5351416 | 0.0000448 | 0.0082472 | 0.5573322 | 83 | LAPTM4A/LAPTM5/AP3B1/CD63/GNS/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| SMCHD1_TARGET_GENES | 1.922524 | 0.6866805 | 0.0008621 | 0.0296569 | 0.4772708 | 25 | NBPF1/TRGV5P/LERFS/TRAV10/MT-ND6/… |
| HOXC11_TARGET_GENES | 2.152064 | 0.7293295 | 0.0010486 | 0.0338166 | 0.4550599 | 32 | S100B/DUSP6 |
| ZNF101_TARGET_GENES | -1.436313 | -0.3600726 | 0.0062072 | 0.0889699 | 0.4070179 | 190 | NEMF/CNEP1R1/FBXO8/SNRPD1/ENTPD1-AS1/… |
| PCGF1_TARGET_GENES | -1.380740 | -0.3270025 | 0.0045587 | 0.0823864 | 0.4070179 | 337 | RCOR3/ZNF280D/DCTN1/PRKAG2/GK5/… |
| ZBED4_TARGET_GENES | -1.486954 | -0.3917365 | 0.0033962 | 0.0700976 | 0.4317077 | 139 | MFSD11/SEC61G/VPS26C/FAM53C/ETS2/… |
| FOXE1_TARGET_GENES | -1.386887 | -0.3170301 | 0.0005339 | 0.0229570 | 0.4772708 | 508 | MFSD11/RPL13/BABAM1/ATF5/IER2/… |
| DLX6_TARGET_GENES | -1.482054 | -0.3440281 | 0.0001914 | 0.0123457 | 0.5188481 | 429 | GMPR2/NEMF/CSNK2B/COX7A2L/VASP/… |
| PHF2_TARGET_GENES | -1.370358 | -0.2998527 | 0.0000684 | 0.0058824 | 0.5384341 | 1016 | RPL13/RCOR3/POP7/GALNS/CSNK2B/… |
| RFX7_TARGET_GENES | -1.554477 | -0.3653217 | 0.0000481 | 0.0049651 | 0.5573322 | 379 | GMPR2/POP7/C12orf4/ATF5/NEMF/… |
| NKX2_5_TARGET_GENES | -1.425889 | -0.3148294 | 0.0000334 | 0.0043071 | 0.5573322 | 830 | MFSD11/METTL3/COX7A2L/PALLD/FBXO8/… |
| ELF2_TARGET_GENES | -1.381361 | -0.2999203 | 0.0000063 | 0.0016220 | 0.6105269 | 1206 | BABAM1/C12orf4/IER2/FBXO8/CHFR/… |
| NFE2L1_TARGET_GENES | -1.428544 | -0.3084065 | 0.0000002 | 0.0000780 | 0.6901325 | 1398 | GMPR2/MFSD11/RMC1/BABAM1/BTG2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| ZNF236_TARGET_GENES | -1.479432 | -0.3940172 | 0.0044454 | 0.0997317 | 0.4070179 | 156 | MEF2A/COX6C/SLX4IP/TSPAN31/MALAT1/… |
| MCM3_TARGET_GENES | -1.546546 | -0.4270079 | 0.0032270 | 0.0756886 | 0.4317077 | 115 | FTSJ3/SLX4IP/ZNF432/MRPL52/DEDD/… |
| ZNF197_TARGET_GENES | -1.395317 | -0.3383996 | 0.0030195 | 0.0741930 | 0.4317077 | 376 | BZW2/DHX40/SLX4IP/NDUFA4/SIPA1L3/… |
| TERF2_TARGET_GENES | -1.589418 | -0.4595987 | 0.0024678 | 0.0636705 | 0.4317077 | 87 | TMBIM1/ATP5MJ/C1orf74/NF1/TBL1XR1/… |
| CIITA_TARGET_GENES | -1.348953 | -0.3065996 | 0.0003165 | 0.0183378 | 0.4984931 | 970 | N4BP1/FTSJ3/SLX4IP/ZNF432/BTN2A2/… |
| FXR1_TARGET_GENES | -1.375623 | -0.3175854 | 0.0003198 | 0.0183378 | 0.4984931 | 716 | COX16/CNOT8/FTSJ3/NDUFA4/SIPA1L3/… |
| BANP_TARGET_GENES | -1.432976 | -0.3341123 | 0.0002902 | 0.0183378 | 0.4984931 | 573 | COX7B/PPM1G/SLX4IP/SPTY2D1/OFD1/… |
| RUVBL1_TARGET_GENES | -1.730227 | -0.4706364 | 0.0000899 | 0.0115936 | 0.5384341 | 132 | RPS14/FTSJ3/SLX4IP/CCDC47/FMC1/… |
| NFRKB_TARGET_GENES | -1.362722 | -0.3038247 | 0.0000094 | 0.0024168 | 0.5933255 | 1489 | MEF2A/COX16/FTSJ3/COX7B/PRMT1/… |
| DLX4_TARGET_GENES | -1.523407 | -0.3536348 | 0.0000055 | 0.0024168 | 0.6105269 | 648 | COX16/FTSJ3/PAIP2B/NDUFA4/CSPP1/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| PSMB5_TARGET_GENES | 1.448105 | 0.3486876 | 0.0042410 | 0.0989136 | 0.4070179 | 214 | H1-4/H1-2/H3C10/PMAIP1/DDIT4/… |
| ZNF213_TARGET_GENES | -1.388793 | -0.3375391 | 0.0041518 | 0.0989136 | 0.4070179 | 384 | DNAJB12/KXD1/GLYR1/GLOD4/RFX1/… |
| IRF5_TARGET_GENES | -1.368287 | -0.3250624 | 0.0037916 | 0.0978228 | 0.4317077 | 538 | RAB4B/COMMD1/RABL2B/CREBZF/RFC3/… |
| NPM1_TARGET_GENES | -1.412006 | -0.3529889 | 0.0036984 | 0.0978228 | 0.4317077 | 275 | SEPTIN2/CSPP1/IREB2/DUSP12/COA1/… |
| PITX1_TARGET_GENES | -1.459145 | -0.3664280 | 0.0026417 | 0.0801821 | 0.4317077 | 258 | RAB4B/GUSB/DNAJB12/COMMD1/ATG9A/… |
| GLI3_TARGET_GENES | -1.375675 | -0.3260530 | 0.0021700 | 0.0699816 | 0.4317077 | 546 | RAB4B/DLG1/EDC3/MUTYH/SEPTIN2/… |
| NFE2L3_TARGET_GENES | -1.720815 | -0.9286220 | 0.0010710 | 0.0425119 | 0.4550599 | 5 | TEP1 |
| ARID5B_TARGET_GENES | -1.380678 | -0.3215926 | 0.0007853 | 0.0377204 | 0.4772708 | 690 | GUSB/CRBN/XRCC5/ZNF37A/PLD3/… |
| GUCY1B1_TARGET_GENES | -1.422982 | -0.3383012 | 0.0006488 | 0.0371979 | 0.4772708 | 509 | GUSB/PLD3/DRAM2/WASHC5/ADCK1/… |
| ZNF2_TARGET_GENES | -1.403596 | -0.3256464 | 0.0002489 | 0.0256816 | 0.4984931 | 757 | DNAJB12/CRBN/NCBP2/TBL3/CCT2/… |
| TAF9B_TARGET_GENES | -1.529550 | -0.3690157 | 0.0000568 | 0.0146545 | 0.5573322 | 425 | MRPL35/COMMD1/SEPTIN2/IMMP1L/WDR74/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| PSMB5_TARGET_GENES | 1.655767 | 0.4197568 | 0.0003547 | 0.0382911 | 0.4984931 | 219 | ID1/ID2/SNHG5/ADM/HSPA1B/… |
| ZNF592_TARGET_GENES | -1.215365 | -0.2552286 | 0.0014816 | 0.0707716 | 0.4550599 | 1253 | AAAS/PPP2R2D/PDIA5/CALCOCO1/GOSR2/… |
| BARX2_TARGET_GENES | -1.232782 | -0.2586457 | 0.0009855 | 0.0707716 | 0.4550599 | 1292 | TRIM26/WBP11/CCT2/RAB11B-AS1/RAD51B/… |
| SUPT20H_TARGET_GENES | -1.254619 | -0.2653314 | 0.0008977 | 0.0707716 | 0.4772708 | 1025 | ILF3-DT/TMEM106B/AAAS/WBP11/RAB11B-AS1/… |
| E2F5_TARGET_GENES | -1.261834 | -0.2719733 | 0.0016490 | 0.0707716 | 0.4550599 | 782 | C19orf53/PTP4A3/STAT5B/C11orf21/RAD51B/… |
| INSM2_TARGET_GENES | -1.348438 | -0.2996003 | 0.0013726 | 0.0707716 | 0.4550599 | 500 | ILF3-DT/CCT2/RAB11B-AS1/WARS1/ARPP19/… |
| NPM1_TARGET_GENES | -1.408016 | -0.3307216 | 0.0016186 | 0.0707716 | 0.4550599 | 281 | ABCD4/COPA/YBX1/THOC1/NFATC3/… |
| E2F2_TARGET_GENES | -1.267328 | -0.2679354 | 0.0003718 | 0.0382911 | 0.4984931 | 1040 | C19orf53/ILF3-DT/FRAT1/AIFM1/ITSN1/… |
| ZNF830_TARGET_GENES | -1.641045 | -0.4030440 | 0.0001248 | 0.0214185 | 0.5188481 | 196 | ABCD4/EIF6/THOC1/NDUFB7/ZSCAN30/… |
| ZNF407_TARGET_GENES | -1.279812 | -0.2668954 | 0.0000466 | 0.0163953 | 0.5573322 | 1396 | ILF3-DT/TMEM106B/AAAS/SYVN1/ITSN1/… |
| ELF2_TARGET_GENES | -1.305142 | -0.2747181 | 0.0000637 | 0.0163953 | 0.5384341 | 1167 | ZUP1/RAB11B-AS1/MFN1/DPH3/GOSR2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| NFE2L1_TARGET_GENES | -1.415033 | -0.3228922 | 8.2e-06 | 0.0041929 | 0.5933255 | 1023 | KIDINS220/CHCHD3/CAP1/TRMT112/TNPO3/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Healthy. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Mild and Healthy for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Mild relative to Healthy, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| SCGGAAGY_ELK1_02 | -1.332276 | -0.2913055 | 0.0001058 | 0.0645449 | 0.5384341 | 1074 | GMPR2/BABAM1/CSNK2B/YTHDF2/CCP110/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KRCTCNNNNMANAGC_UNKNOWN | 2.460644 | 0.7779709 | 0.00e+00 | 0.0000280 | 0.7195128 | 37 | H1-4/H1-2/H3C2/H3C10/H3C7/… |
| TTTNNANAGCYR_UNKNOWN | 2.032007 | 0.5781782 | 3.45e-05 | 0.0105254 | 0.5573322 | 65 | H1-4/H1-2/H3C2/H3C10/H3C7/… |
| SCGGAAGY_ELK1_02 | -1.332961 | -0.3048625 | 3.16e-04 | 0.0642453 | 0.4984931 | 1042 | RAB4B/PTPN2/KXD1/PPP1R35/UFD1/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| FREAC4_01 | 1.856993 | 0.5632482 | 0.0007946 | 0.0973142 | 0.4772708 | 62 | ID2/CDKN1A/CAMK1D/CITED2/CTCF/… |
| GATA_Q6 | 1.854005 | 0.5680466 | 0.0005867 | 0.0973142 | 0.4772708 | 60 | CLU/ADM/LCAT/CACNA2D3/STON2/… |
| E12_Q6 | 1.794529 | 0.4997787 | 0.0004455 | 0.0973142 | 0.4984931 | 105 | C1QC/CAMK1D/FAM53C/MIR22HG/ACAP1/… |
| E47_01 | 1.710654 | 0.4642730 | 0.0007977 | 0.0973142 | 0.4772708 | 123 | C1QC/CAMK1D/CCNYL1/EPHB2/PPTC7/… |
| MYOD_01 | 1.677834 | 0.4629032 | 0.0009672 | 0.0983319 | 0.4772708 | 114 | C1QC/FAM53C/MIR22HG/TICAM1/ACAP1/… |
| CCCNNNNNNAAGWT_UNKNOWN | -1.754053 | -0.5005989 | 0.0002481 | 0.0973142 | 0.4984931 | 71 | IRF9/NCDN/CPEB4/DHRS9/MRPL49/… |
Here we check number of differentially expressed genes (DEGs) after applying some statistical thresholds:
Adjusted P value < 0.1
Log Fold Change > +/- 0
Proportion of cells expressing gene in condition A > 0.1 if LFC is positive
Proportion of cells expressing gene in condition B > 0.1 if LFC is negative
Differential expression compares genes in condition A (left) versus condition B (right). The left column indicates the number of genes upregulated in the left condition, while the right column indicates genes upregulated in the right condition.
Finally, the thresholds above do not affect downstream results from pathway analysis, they are just meant to count the number of DEGs. Downstream usage of DEGs can use these thresholds, or you can choose other appropriate cutoffs.
| Cluster | Covid_Critical_high | Covid_Mild_high |
|---|---|---|
| cluster_1 | 193 | 66 |
| cluster_2 | 4 | 1 |
| cluster_3 | 9 | 5 |
| cluster_4 | 60 | 51 |
| cluster_5 | 0 | 0 |
| cluster_6 | 6 | 5 |
| cluster_7 | 29 | 11 |
To ensure the differential expression results are robust, it is helpful to inspect all DEGs by visualizing them in a heatmap. For each cluster, we compare the DEGs at both the single cell level, and either the pseudobulk level (if we used pseudobulk_edgeR) or the average RISC value level (if using wilcox).
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for all cells in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Here, we plot all significant DEGs for pseudobulked replicates in this cluster from condition A vs condition B. Condition A is denoted with positive fold change.
Pathway analysis for the cross-condition analysis is performed for overexpressed and underexpressed genes for each cluster. This is done via Gene Set Enrichment Analysis (GSEA) (Subramanian et al 2005).
GSEA is preferred over other pathway analysis such as fisher tests or chi-square tests because it does not require making arbitrary cutoffs to the number of DEGs and takes into account how strongly differentially expressed each gene may be. For the latter, data-driven gene-specific weight is applied. We use a standard weighting method of -log10(P-value) * sign of Log Fold Change.
The pathways we choose in pathway analysis are derived from the Molecular Signatures Database (MSIGDB) where they are sorted by categories, such as Gene Ontology (GO) Biological Process, GO Molecular Function, KEGG, Reactome, etc. These are databases that annotate genes by function or molecular pathway.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_E2F_TARGETS | 2.737946 | 0.7895349 | 0.0000000 | 0.0000000 | 1.4885397 | 192 | CENPM/MCM4/TK1/RRM2/MKI67/… |
| HALLMARK_G2M_CHECKPOINT | 2.544265 | 0.7445422 | 0.0000000 | 0.0000000 | 1.2210538 | 172 | GINS2/MKI67/MCM2/STMN1/CKS2/… |
| HALLMARK_MYC_TARGETS_V1 | 2.075492 | 0.5995023 | 0.0000000 | 0.0000000 | 0.7749390 | 190 | TYMS/MCM4/MCM2/MCM7/DUT/… |
| HALLMARK_MTORC1_SIGNALING | 1.817999 | 0.5273447 | 0.0000042 | 0.0000530 | 0.6105269 | 180 | MCM4/RRM2/MCM2/DHFR/SHMT2/… |
| HALLMARK_MITOTIC_SPINDLE | 1.705572 | 0.5017886 | 0.0001141 | 0.0011410 | 0.5384341 | 164 | LMNB1/BIRC5/TOP2A/SMC4/YWHAE/… |
| HALLMARK_APOPTOSIS | 1.747477 | 0.5305002 | 0.0001396 | 0.0011632 | 0.5188481 | 126 | HMGB2/TOP2A/BAX/GSTM1/CD38/… |
| HALLMARK_ESTROGEN_RESPONSE_LATE | 1.810981 | 0.5915113 | 0.0004493 | 0.0032092 | 0.4984931 | 75 | PRSS23/FABP5/RBBP8/FOS/HPRT1/… |
| HALLMARK_SPERMATOGENESIS | 1.786145 | 0.6065984 | 0.0010041 | 0.0062753 | 0.4550599 | 55 | CDKN3/NCAPH/TKTL1/KIF2C/CCNB2/… |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 1.689822 | 0.5688202 | 0.0018845 | 0.0104697 | 0.4550599 | 60 | LGALS1/JUN/MAGEE1/GLIPR1/AREG/… |
| HALLMARK_MYC_TARGETS_V2 | 1.724708 | 0.5889784 | 0.0045457 | 0.0227287 | 0.4070179 | 52 | MCM4/PLK1/MCM5/PLK4/UNG/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 1.730964 | 0.4704992 | 0.0001438 | 0.0071898 | 0.5188481 | 130 | RHOB/JUN/FOS/TNIP2/IRF1/… |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | 1.579154 | 0.4599315 | 0.0033237 | 0.0830925 | 0.4317077 | 85 | IFITM3/IRF1/WARS1/IFI30/IFIT3/… |
| HALLMARK_CHOLESTEROL_HOMEOSTASIS | 1.597827 | 0.4965563 | 0.0083353 | 0.0912447 | 0.3807304 | 57 | FABP5/ATF3/PMVK/FDPS/SCD/… |
| HALLMARK_MYOGENESIS | 1.572948 | 0.4668065 | 0.0070259 | 0.0912447 | 0.4070179 | 74 | CFD/PTP4A3/PLXNB2/FXYD1/ACSL1/… |
| HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION | 1.547222 | 0.4865862 | 0.0122976 | 0.0912447 | 0.3807304 | 51 | RHOB/EMP3/JUN/LGALS1/P3H1/… |
| HALLMARK_IL2_STAT5_SIGNALING | 1.479236 | 0.4056574 | 0.0111477 | 0.0912447 | 0.3807304 | 118 | IFITM3/RHOB/COCH/SOCS1/KLF6/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.391599 | 0.3679859 | 0.0127743 | 0.0912447 | 0.3807304 | 164 | IFITM3/IRF1/MTHFD2/WARS1/IFI30/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 1.908383 | 0.5229669 | 0.0000018 | 0.0000917 | 0.6435518 | 126 | IFNGR2/FOS/SGK1/JUN/SOCS3/… |
| HALLMARK_IL2_STAT5_SIGNALING | 1.764689 | 0.4796179 | 0.0000243 | 0.0006084 | 0.5756103 | 134 | IFITM3/SOCS2/PIM1/CISH/PTGER2/… |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.687963 | 0.4452124 | 0.0001215 | 0.0020244 | 0.5384341 | 157 | IFITM3/PIM1/IFIT1/SOCS3/TNFAIP3/… |
| HALLMARK_HYPOXIA | 1.788345 | 0.5047328 | 0.0002017 | 0.0020807 | 0.5188481 | 97 | PIM1/FOS/JUN/TNFAIP3/HK2/… |
| HALLMARK_ALLOGRAFT_REJECTION | 1.659341 | 0.4509858 | 0.0002081 | 0.0020807 | 0.5188481 | 134 | IFNGR2/EIF5A/CD40LG/GZMA/SOCS1/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.685743 | 0.4751696 | 0.0007592 | 0.0063266 | 0.4772708 | 101 | IFNGR2/PTGER2/CCL5/SLC31A2/TNFRSF1B/… |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | 1.754310 | 0.5574797 | 0.0026213 | 0.0187235 | 0.4317077 | 49 | PIM1/IFNGR2/JUN/SOCS3/SOCS1/… |
| HALLMARK_ESTROGEN_RESPONSE_LATE | 1.656993 | 0.4950897 | 0.0033919 | 0.0188437 | 0.4317077 | 68 | FOS/CISH/SGK1/AREG/SIAH2/… |
| HALLMARK_ESTROGEN_RESPONSE_EARLY | 1.547493 | 0.4580171 | 0.0071503 | 0.0325014 | 0.4070179 | 71 | FOS/UGCG/AREG/SIAH2/PPIF/… |
| HALLMARK_SPERMATOGENESIS | 1.568080 | 0.5051500 | 0.0133072 | 0.0554466 | 0.3807304 | 47 | PHF7/CCNB2/SPATA6/PCSK1N/GSTM3/… |
| HALLMARK_DNA_REPAIR | -1.477229 | -0.3700879 | 0.0057255 | 0.0286277 | 0.4070179 | 133 | TSG101/GTF2A2/POLR2G/POLR3C/NFX1/… |
| HALLMARK_PROTEIN_SECRETION | -1.608506 | -0.4373577 | 0.0032887 | 0.0188437 | 0.4317077 | 86 | TSG101/VAMP3/M6PR/ATP7A/GNAS/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HALLMARK_INTERFERON_GAMMA_RESPONSE | 1.918113 | 0.4947319 | 0.0000008 | 0.0000398 | 0.6594444 | 171 | CXCL10/CD86/PIM1/CXCL9/HLA-DRB1/… |
| HALLMARK_IL2_STAT5_SIGNALING | 1.989652 | 0.5280380 | 0.0000020 | 0.0000494 | 0.6272567 | 131 | CXCL10/SLC39A8/CD86/PIM1/CCND3/… |
| HALLMARK_ALLOGRAFT_REJECTION | 1.785129 | 0.4730931 | 0.0001526 | 0.0025437 | 0.5188481 | 134 | FCGR2B/CD86/CCND3/CXCL9/HLA-DMB/… |
| HALLMARK_G2M_CHECKPOINT | 1.693166 | 0.4408433 | 0.0002619 | 0.0032734 | 0.4984931 | 156 | BIRC5/MKI67/PTTG1/TROAP/NOTCH2/… |
| HALLMARK_INFLAMMATORY_RESPONSE | 1.641244 | 0.4393283 | 0.0011885 | 0.0084889 | 0.4550599 | 124 | CXCL10/CD82/CXCL9/GNA15/PTGER2/… |
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | 1.639965 | 0.4295291 | 0.0009050 | 0.0084889 | 0.4772708 | 149 | CXCL10/TUBB2A/KLF10/DUSP2/ATP2B1/… |
| HALLMARK_E2F_TARGETS | 1.593703 | 0.4101575 | 0.0010971 | 0.0084889 | 0.4550599 | 174 | BIRC5/MKI67/PTTG1/CDKN3/HMGB3/… |
| HALLMARK_KRAS_SIGNALING_UP | 1.671064 | 0.4623526 | 0.0026793 | 0.0167456 | 0.4317077 | 101 | RETN/CXCL10/CD37/MMD/GYPC/… |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | 1.589935 | 0.4842787 | 0.0135668 | 0.0753711 | 0.3807304 | 61 | CXCL10/ITGA4/PIM1/CXCL9/CD9/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_PROTEIN_DNA_COMPLEX_ASSEMBLY | 2.277259 | 0.6811043 | 0.0000000 | 0.0000000 | 0.9214260 | 148 | H3C2/CENPN/MCM2/ASF1B/CDC45/… |
| GOBP_DNA_CONFORMATION_CHANGE | 2.166534 | 0.6164765 | 0.0000000 | 0.0000000 | 0.9214260 | 237 | H3C2/GINS2/MCM4/MCM2/ASF1B/… |
| GOBP_NUCLEOSOME_ASSEMBLY | 2.314559 | 0.7487408 | 0.0000000 | 0.0000000 | 0.8634154 | 83 | H3C2/MCM2/ASF1B/HMGB2/CENPW/… |
| GOBP_MEIOTIC_CELL_CYCLE | 2.218469 | 0.6631053 | 0.0000000 | 0.0000000 | 0.8390889 | 142 | CKS2/PKMYT1/CDC20/RAD51/NCAPH/… |
| GOBP_PROTEIN_DNA_COMPLEX_SUBUNIT_ORGANIZATION | 2.132524 | 0.6188807 | 0.0000000 | 0.0000001 | 0.8140358 | 186 | H3C2/CENPN/MCM2/ASF1B/CDC45/… |
| GOBP_DNA_PACKAGING | 2.140101 | 0.6272248 | 0.0000000 | 0.0000002 | 0.8140358 | 173 | H3C2/MCM2/ASF1B/HMGB2/NCAPH/… |
| GOBP_NUCLEOSOME_ORGANIZATION | 2.184606 | 0.6615645 | 0.0000000 | 0.0000003 | 0.8012156 | 125 | H3C2/MCM2/ASF1B/HMGB2/CENPW/… |
| GOBP_DNA_REPLICATION_INITIATION | 2.275093 | 0.8575246 | 0.0000000 | 0.0000008 | 0.7749390 | 31 | MCM4/MCM2/MCM7/CDC45/MCM10/… |
| GOBP_CELL_CYCLE_DNA_REPLICATION | 2.221539 | 0.8091916 | 0.0000000 | 0.0000092 | 0.7195128 | 39 | MCM4/MCM2/PCNA/CDC45/RAD51/… |
| GOBP_DNA_UNWINDING_INVOLVED_IN_DNA_REPLICATION | 2.177817 | 0.8761540 | 0.0000002 | 0.0000477 | 0.6901325 | 22 | GINS2/MCM4/MCM2/MCM7/CDC45/… |
| GOBP_REGULATION_OF_ACTIN_NUCLEATION | -1.865035 | -0.7056101 | 0.0009177 | 0.0549241 | 0.4772708 | 20 | WAS/WASF2/AP1AR/ARFIP1/CYFIP1/… |
| GOBP_URATE_METABOLIC_PROCESS | -1.584348 | -0.9831093 | 0.0003745 | 0.0271984 | 0.4984931 | 3 | PRPS1/ABCG2 |
| GOBP_REGULATION_OF_ARP2_3_COMPLEX_MEDIATED_ACTIN_NUCLEATION | -1.963158 | -0.7789338 | 0.0003658 | 0.0268287 | 0.4984931 | 16 | WAS/WASF2/AP1AR/ARFIP1/CYFIP1/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 2.133270 | 0.5206155 | 0.0000000 | 0.0000000 | 1.0476265 | 358 | IGHV5-10-1/IGHV3-49/IGKV6-21/IGLV4-3/IGLV5-37/… |
| GOBP_REGULATION_OF_B_CELL_ACTIVATION | 2.344807 | 0.6307548 | 0.0000000 | 0.0000000 | 0.9865463 | 150 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_COMPLEMENT_ACTIVATION | 2.519309 | 0.7459768 | 0.0000000 | 0.0000000 | 0.9436322 | 70 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE_MEDIATED_BY_CIRCULATING_IMMUNOGLOBULIN | 2.487493 | 0.7325738 | 0.0000000 | 0.0000000 | 0.9436322 | 74 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_POSITIVE_REGULATION_OF_B_CELL_ACTIVATION | 2.354249 | 0.6602114 | 0.0000000 | 0.0000000 | 0.9101197 | 106 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_B_CELL_RECEPTOR_SIGNALING_PATHWAY | 2.330385 | 0.6572601 | 0.0000000 | 0.0000000 | 0.8870750 | 103 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_MEMBRANE_INVAGINATION | 2.333606 | 0.6574626 | 0.0000000 | 0.0000000 | 0.8753251 | 100 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_B_CELL_MEDIATED_IMMUNITY | 2.200900 | 0.5948299 | 0.0000000 | 0.0000000 | 0.8753251 | 146 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_PHAGOCYTOSIS_RECOGNITION | 2.397788 | 0.7209858 | 0.0000000 | 0.0000001 | 0.8390889 | 64 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_CELL_RECOGNITION | 2.128607 | 0.5962921 | 0.0000000 | 0.0000022 | 0.7614608 | 108 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOBP_RIBONUCLEOPROTEIN_COMPLEX_BIOGENESIS | -1.470557 | -0.3363683 | 0.0006050 | 0.0916945 | 0.4772708 | 361 | MRPL1/EBNA1BP2/SRPK2/USP39/METTL18/… |
| GOBP_POSITIVE_REGULATION_OF_ORGANELLE_ORGANIZATION | -1.474498 | -0.3452834 | 0.0005451 | 0.0843482 | 0.4772708 | 311 | TNFSF10/CALCOCO2/WRAP73/SLC25A4/ANXA1/… |
| GOBP_ORGANONITROGEN_COMPOUND_BIOSYNTHETIC_PROCESS | -1.282478 | -0.2719349 | 0.0004705 | 0.0760287 | 0.4984931 | 1090 | RPL30/SVIP/METTL18/PIGB/RB1CC1/… |
| GOBP_CYTOPLASMIC_TRANSLATION | -1.859527 | -0.4822311 | 0.0000284 | 0.0055584 | 0.5756103 | 136 | RPL30/FMR1/RPS5/DHX36/RPL22/… |
| GOBP_CELLULAR_MACROMOLECULE_BIOSYNTHETIC_PROCESS | -1.369944 | -0.2937096 | 0.0000220 | 0.0045352 | 0.5756103 | 997 | RPL30/NASP/SVIP/METTL18/PIGB/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 1.875645 | 0.4546636 | 0.0000000 | 0.0000006 | 0.8266573 | 403 | TRBV18/TRAV27/HLA-DQB1/TRBV10-1/IGKV2-30/… |
| GOBP_RESPONSE_TO_BACTERIUM | 1.763266 | 0.4334760 | 0.0000002 | 0.0003109 | 0.6901325 | 327 | TRAV27/FOS/GNLY/TRAV25/IGHV3-49/… |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_PROCESS | 1.929180 | 0.5758266 | 0.0000281 | 0.0161169 | 0.5756103 | 69 | IFITM3/IFIT1/OAS2/TRIM11/CCL5/… |
| GOBP_RECEPTOR_SIGNALING_PATHWAY_VIA_STAT | 1.875341 | 0.5427315 | 0.0000541 | 0.0268474 | 0.5573322 | 82 | SOCS2/SOCS3/SOCS1/TNFRSF18/CCL5/… |
| GOBP_RESPONSE_TO_MOLECULE_OF_BACTERIAL_ORIGIN | 1.692554 | 0.4483018 | 0.0000993 | 0.0389238 | 0.5384341 | 164 | FOS/PTGER2/TNFAIP3/ARID5A/CCL5/… |
| GOBP_PLASMA_CELL_DIFFERENTIATION | 1.733595 | 0.9494068 | 0.0002125 | 0.0626707 | 0.5188481 | 5 | LGALS1/XBP1/IL10 |
| GOBP_NEGATIVE_REGULATION_OF_VIRAL_GENOME_REPLICATION | 1.931641 | 0.6115060 | 0.0002622 | 0.0650605 | 0.4984931 | 45 | IFITM3/IFIT1/OAS2/CCL5/MX1/… |
| GOBP_RESPONSE_TO_ALCOHOL | 1.753322 | 0.4782822 | 0.0002908 | 0.0658004 | 0.4984931 | 123 | CLDN5/FOS/SGK1/CDO1/PTGER2/… |
| GOBP_TYROSINE_PHOSPHORYLATION_OF_STAT_PROTEIN | 1.866332 | 0.6225493 | 0.0004476 | 0.0841860 | 0.4984931 | 36 | SOCS3/SOCS1/TNFRSF18/CCL5/IL6ST/… |
| GOBP_RESPONSE_TO_MINERALOCORTICOID | 1.884710 | 0.7786223 | 0.0006691 | 0.0976717 | 0.4772708 | 14 | FOS/SGK1/CYBA |
| GOBP_TRIPEPTIDE_TRANSMEMBRANE_TRANSPORT | -1.639683 | -0.9829291 | 0.0005507 | 0.0911007 | 0.4772708 | 3 | ABCC1 |
| GOBP_TRIPEPTIDE_TRANSPORT | -1.639683 | -0.9829291 | 0.0005507 | 0.0911007 | 0.4772708 | 3 | ABCC1 |
| GOBP_NCRNA_3_END_PROCESSING | -1.900619 | -0.6150942 | 0.0004283 | 0.0839063 | 0.4984931 | 36 | DKC1/INTS2/TRMT10C/EXOSC10/INTS14/… |
| GOBP_LYSOSOMAL_TRANSPORT | -1.813481 | -0.4889308 | 0.0002202 | 0.0626707 | 0.5188481 | 93 | TSG101/RAB7A/M6PR/BIN1/CLEC16A/… |
| GOBP_VACUOLAR_TRANSPORT | -1.767638 | -0.4497428 | 0.0000689 | 0.0302966 | 0.5384341 | 124 | TSG101/RAB7A/M6PR/BIN1/CLEC16A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_HUMORAL_IMMUNE_RESPONSE | 2.165119 | 0.5878952 | 0.0000000 | 0.0000137 | 0.7881868 | 127 | CXCL10/FCGR2B/C1QA/C1QB/CXCL9/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II | 2.221142 | 0.7768539 | 0.0000006 | 0.0006202 | 0.6594444 | 27 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/CTSD/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_PEPTIDE_ANTIGEN_VIA_MHC_CLASS_II | 2.237628 | 0.8115468 | 0.0000007 | 0.0006842 | 0.6594444 | 23 | FCGR2B/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/… |
| GOBP_ANTIMICROBIAL_HUMORAL_IMMUNE_RESPONSE_MEDIATED_BY_ANTIMICROBIAL_PEPTIDE | 2.180670 | 0.7663601 | 0.0000024 | 0.0011792 | 0.6272567 | 26 | CXCL10/CXCL9/S100A12/RNASE3/HMGN2/… |
| GOBP_MICROGLIAL_CELL_ACTIVATION | 2.166229 | 0.7468120 | 0.0000042 | 0.0018359 | 0.6105269 | 30 | CX3CR1/CST7/C1QA/LRRK2/FPR2/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_ANTIGEN | 2.132286 | 0.6986919 | 0.0000044 | 0.0018359 | 0.6105269 | 39 | FCGR2B/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/… |
| GOBP_ANTIMICROBIAL_HUMORAL_RESPONSE | 2.073288 | 0.6587770 | 0.0000061 | 0.0022711 | 0.6105269 | 47 | CXCL10/CXCL9/S100A9/S100A12/RNASE3/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_ANTIGEN | 2.058996 | 0.6466273 | 0.0000100 | 0.0032469 | 0.5933255 | 50 | FCGR2B/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/… |
| GOBP_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_EXOGENOUS_PEPTIDE_ANTIGEN | 2.129063 | 0.7265962 | 0.0000121 | 0.0037956 | 0.5933255 | 32 | FCGR2B/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/… |
| GOBP_DEFENSE_RESPONSE_TO_FUNGUS | 2.085321 | 0.7405266 | 0.0000393 | 0.0086822 | 0.5573322 | 25 | CX3CR1/S100A8/S100A9/S100A12/USP15/… |
| GOBP_TELOMERE_MAINTENANCE_VIA_RECOMBINATION | -1.856567 | -0.7685224 | 0.0013664 | 0.0737555 | 0.4550599 | 13 | BRCA2/SMC6/SMC5/RAD50/ERCC4 |
| GOBP_SPHINGOMYELIN_CATABOLIC_PROCESS | -1.711820 | -0.9186817 | 0.0010558 | 0.0646364 | 0.4550599 | 5 | PRKCD/SMPDL3A |
| GOBP_NEGATIVE_REGULATION_OF_PROTEIN_LOCALIZATION_TO_NUCLEUS | -1.883167 | -0.7081220 | 0.0007839 | 0.0539573 | 0.4772708 | 20 | LZTS2/ILRUN/NF1/FBXO4/LATS2/… |
| GOBP_RNA_PHOSPHODIESTER_BOND_HYDROLYSIS_ENDONUCLEOLYTIC | -1.794260 | -0.5300054 | 0.0005956 | 0.0446348 | 0.4772708 | 61 | POP4/DBR1/POP5/RPP30/LACTB2/… |
| GOBP_ORGANELLE_TRANSPORT_ALONG_MICROTUBULE | -1.908485 | -0.5737035 | 0.0001572 | 0.0206867 | 0.5188481 | 54 | FBXW11/BLOC1S2/KIF5B/BLOC1S1/RHOT1/… |
| GOBP_VESICLE_TRANSPORT_ALONG_MICROTUBULE | -2.019859 | -0.6825127 | 0.0001058 | 0.0158803 | 0.5384341 | 31 | FBXW11/BLOC1S2/KIF5B/BLOC1S1/BLOC1S3/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_GLIAL_CELL_DEVELOPMENT | 2.456649 | 0.7731897 | 0.0000002 | 0.0013220 | 0.6901325 | 27 | S100A8/S100A9/CLU/PHGDH/ILK/… |
| GOBP_DEFENSE_RESPONSE_TO_FUNGUS | 2.395708 | 0.8599855 | 0.0000007 | 0.0024346 | 0.6594444 | 16 | S100A8/S100A9/S100A12/SPON2 |
| GOBP_PEPTIDE_ANTIGEN_ASSEMBLY_WITH_MHC_CLASS_II_PROTEIN_COMPLEX | 2.305183 | 0.8578644 | 0.0000032 | 0.0075097 | 0.6272567 | 14 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| GOBP_PROTEIN_NITROSYLATION | 2.080690 | 0.9497010 | 0.0000061 | 0.0105872 | 0.6105269 | 7 | S100A8/S100A9/ATP2B4 |
| GOBP_POSITIVE_REGULATION_OF_NF_KAPPAB_TRANSCRIPTION_FACTOR_ACTIVITY | 2.054708 | 0.5639239 | 0.0000115 | 0.0160569 | 0.5933255 | 53 | S100A8/S100A9/S100A12/CLU/TRIM14 |
| GOBP_GLIAL_CELL_DIFFERENTIATION | 2.126475 | 0.5973731 | 0.0000190 | 0.0217074 | 0.5756103 | 48 | S100A8/S100A9/CLU/PHGDH/ILK/… |
| GOBP_ASTROCYTE_DEVELOPMENT | 2.105190 | 0.8857794 | 0.0000515 | 0.0359789 | 0.5573322 | 9 | S100A8/S100A9/IFNGR1/GRN |
| GOBP_GLIOGENESIS | 1.953508 | 0.5294465 | 0.0000452 | 0.0359789 | 0.5573322 | 58 | S100A8/S100A9/CLU/PHGDH/ILK/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE | 1.934492 | 0.5344818 | 0.0001599 | 0.0863855 | 0.5188481 | 50 | S100A9/S100A12/CLU/POU2F2/HLA-DRB1/… |
| GOBP_DEFENSE_RESPONSE_TO_BACTERIUM | 1.853923 | 0.4865660 | 0.0001606 | 0.0863855 | 0.5188481 | 68 | S100A8/S100A9/MPEG1/S100A12/RNASE2/… |
| GOBP_REGULATION_OF_NON_CANONICAL_WNT_SIGNALING_PATHWAY | -1.563181 | -0.9859825 | 0.0000807 | 0.0512741 | 0.5384341 | 3 | DAB2/RNF213 |
| GOBP_PROTEIN_DENEDDYLATION | -1.800583 | -0.8718514 | 0.0000467 | 0.0359789 | 0.5573322 | 9 | TOR1A/COPS7A/COPS2/COPS3/COPS5 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOBP_ADAPTIVE_IMMUNE_RESPONSE | 2.048789 | 0.6588388 | 0.0000000 | 0.0000000 | 1.1778933 | 285 | TRBV24-1/IGKV1-9/JCHAIN/TRBV7-6/IGHA1/… |
| GOBP_IMMUNE_RESPONSE | 1.683739 | 0.5271378 | 0.0000000 | 0.0000000 | 1.1690700 | 844 | GNLY/TRBV24-1/IGKV1-9/GZMB/JCHAIN/… |
| GOBP_RESPONSE_TO_BACTERIUM | 1.744615 | 0.5633550 | 0.0000000 | 0.0000097 | 0.7614608 | 255 | GNLY/JCHAIN/IGHA1/IGKC/FOS/… |
| GOBP_DEFENSE_RESPONSE_TO_BACTERIUM | 1.915474 | 0.6553466 | 0.0000014 | 0.0014097 | 0.6435518 | 97 | GNLY/JCHAIN/IGHA1/IGKC/SPON2/… |
| GOBP_INNATE_IMMUNE_RESPONSE | 1.515235 | 0.4824900 | 0.0000010 | 0.0014097 | 0.6435518 | 397 | GZMB/JCHAIN/IGHA1/IGKC/IFITM3/… |
| GOBP_HUMORAL_IMMUNE_RESPONSE | 1.948609 | 0.6758531 | 0.0000048 | 0.0038839 | 0.6105269 | 81 | GNLY/JCHAIN/IGHA1/IGKC/IGLL5/… |
| GOBP_ANTIMICROBIAL_HUMORAL_RESPONSE | 2.032802 | 0.8076655 | 0.0000449 | 0.0297954 | 0.5573322 | 29 | GNLY/JCHAIN/IGHA1/SPON2/IGHA2/… |
| GOBP_CELL_KILLING | 1.832861 | 0.6310662 | 0.0000511 | 0.0311168 | 0.5573322 | 90 | GNLY/GZMB/TYROBP/ARG1/CX3CR1/… |
| GOBP_COMPLEMENT_ACTIVATION | 1.966954 | 0.8112968 | 0.0001437 | 0.0807669 | 0.5188481 | 22 | IGHA1/IGKC/IGLL5/IGHA2/IGHM/… |
| GOBP_POSITIVE_REGULATION_OF_RESPIRATORY_BURST | 1.633790 | 0.9845274 | 0.0001799 | 0.0938862 | 0.5188481 | 4 | JCHAIN/IGHA1/IGHA2 |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_SINGLE_STRANDED_DNA_BINDING | 2.116148 | 0.6497663 | 0.0000000 | 0.0000606 | 0.7195128 | 111 | MCM4/MCM2/MCM7/CDC45/HMGB2/… |
| GOMF_PEPTIDE_ANTIGEN_BINDING | 2.047347 | 0.7822064 | 0.0000079 | 0.0044104 | 0.5933255 | 31 | TRBV7-9/TRBV28/TRAV8-4/TRAV29DV5/TRAV12-1/… |
| GOMF_MHC_PROTEIN_BINDING | 2.024207 | 0.7513247 | 0.0000267 | 0.0082618 | 0.5756103 | 37 | TRBV7-9/CD8B/TRAV8-4/TRAV29DV5/TUBB4B/… |
| GOMF_SINGLE_STRANDED_DNA_HELICASE_ACTIVITY | 1.977464 | 0.8021799 | 0.0000445 | 0.0082618 | 0.5573322 | 23 | MCM4/MCM2/MCM7/RAD51/MCM5/… |
| GOMF_ANTIGEN_BINDING | 1.843091 | 0.5625983 | 0.0000295 | 0.0082618 | 0.5756103 | 118 | TRBV7-9/TRBV28/TRAV8-4/TRAV29DV5/TRAV12-1/… |
| GOMF_DNA_HELICASE_ACTIVITY | 1.936528 | 0.6414983 | 0.0000732 | 0.0110135 | 0.5384341 | 67 | MCM4/MCM2/MCM7/RAD51/MCM5/… |
| GOMF_DNA_REPLICATION_ORIGIN_BINDING | 1.974101 | 0.8630608 | 0.0001478 | 0.0189926 | 0.5188481 | 15 | MCM2/CDC45/MCM10/MCM5/CDC6/… |
| GOMF_CARBOXYLIC_ACID_BINDING | 1.842536 | 0.6207618 | 0.0001707 | 0.0203563 | 0.5188481 | 64 | PTGDS/TYMS/DHFR/FABP5/SHMT2/… |
| GOMF_ISOPRENOID_BINDING | 1.836143 | 0.9050190 | 0.0005299 | 0.0465767 | 0.4772708 | 9 | PTGDS/FABP5 |
| GOMF_DNA_SECONDARY_STRUCTURE_BINDING | 1.824337 | 0.6877532 | 0.0011219 | 0.0697218 | 0.4550599 | 33 | CLSPN/HMGB2/RAD51AP1/RBBP8/HMGB3/… |
| GOMF_RNA_POLYMERASE_II_C_TERMINAL_DOMAIN_PHOSPHOSERINE_BINDING | -1.656665 | -0.9181357 | 0.0011874 | 0.0697218 | 0.4550599 | 5 | PCIF1/RTF1 |
| GOMF_RNA_POLYMERASE_II_C_TERMINAL_DOMAIN_BINDING | -1.889651 | -0.9095411 | 0.0000791 | 0.0110135 | 0.5384341 | 8 | PCIF1/SCAF1/RTF1 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_IMMUNOGLOBULIN_RECEPTOR_BINDING | 2.515320 | 0.7591970 | 0.0000000 | 0.0000000 | 0.9101197 | 61 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOMF_ANTIGEN_BINDING | 2.185833 | 0.5928714 | 0.0000000 | 0.0000006 | 0.8012156 | 125 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOMF_OXIDOREDUCTASE_ACTIVITY | 1.497780 | 0.3594296 | 0.0001513 | 0.0628451 | 0.5188481 | 412 | MT-ND2/MTHFD2/SRD5A1/CHCHD4/MT-CYB/… |
| GOMF_STRUCTURAL_CONSTITUENT_OF_RIBOSOME | -2.003018 | -0.5045388 | 0.0000067 | 0.0037351 | 0.6105269 | 150 | RPL30/MRPL1/MRPS31/MRPL46/RPS5/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_CYTOKINE_ACTIVITY | 2.001337 | 0.6480582 | 0.0000289 | 0.0240666 | 0.5756103 | 43 | CMTM8/CD40LG/CCL5/CXCL13/MIF/… |
| GOMF_ANTIGEN_BINDING | 1.855698 | 0.5098333 | 0.0000216 | 0.0240666 | 0.5756103 | 111 | HLA-DQB1/IGKV2-30/IGHV3-49/LAG3/IGLV2-23/… |
| GOMF_SIGNALING_RECEPTOR_REGULATOR_ACTIVITY | 1.794035 | 0.5069550 | 0.0002520 | 0.0666386 | 0.4984931 | 95 | CMTM8/CD40LG/AREG/CCL5/EDA/… |
| GOMF_NUCLEAR_RECEPTOR_COACTIVATOR_ACTIVITY | -1.870840 | -0.5582674 | 0.0004124 | 0.0858290 | 0.4984931 | 46 | BRD8/TSG101/NCOA1/ETS1/PKN1 |
| GOMF_GLUTATHIONE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY | -1.620461 | -0.9829291 | 0.0002802 | 0.0666386 | 0.4984931 | 3 | ABCC1 |
| GOMF_TRIPEPTIDE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY | -1.620461 | -0.9829291 | 0.0002802 | 0.0666386 | 0.4984931 | 3 | ABCC1 |
| GOMF_PEPTIDE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY | -1.888030 | -0.9024118 | 0.0001746 | 0.0666386 | 0.5188481 | 7 | ABCC1/TAP1/SLC15A4 |
| GOMF_INTRAMOLECULAR_TRANSFERASE_ACTIVITY | -1.958486 | -0.7566921 | 0.0002373 | 0.0666386 | 0.5188481 | 17 | DKC1/LSS/TRUB2/TRUB1/PGM2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_PEPTIDE_BINDING | 1.860633 | 0.4803554 | 0.0000038 | 0.0032306 | 0.6105269 | 167 | FCGR2B/C1QA/HLA-DRB1/HLA-DPA1/HLA-G/… |
| GOMF_MHC_CLASS_II_PROTEIN_COMPLEX_BINDING | 2.161612 | 0.7860689 | 0.0000106 | 0.0044716 | 0.5933255 | 23 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/HLA-DQA1/… |
| GOMF_MHC_PROTEIN_COMPLEX_BINDING | 2.094137 | 0.7297481 | 0.0000199 | 0.0062596 | 0.5756103 | 29 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/HLA-DQA1/… |
| GOMF_IMMUNE_RECEPTOR_ACTIVITY | 1.940081 | 0.5743769 | 0.0000496 | 0.0107538 | 0.5573322 | 66 | CX3CR1/FCGR3A/HLA-DRB1/HLA-DPA1/HLA-DQB1/… |
| GOMF_CHEMOKINE_RECEPTOR_BINDING | 2.044113 | 0.7540300 | 0.0000934 | 0.0174540 | 0.5384341 | 22 | CXCL10/CX3CR1/CXCL9/CCL3L3/CCL4/… |
| GOMF_LONG_CHAIN_FATTY_ACID_BINDING | 1.893833 | 0.8795587 | 0.0001511 | 0.0253955 | 0.5188481 | 9 | S100A8/ALOX5AP/S100A9/STX3 |
| GOMF_MONOCARBOXYLIC_ACID_BINDING | 1.958639 | 0.6566511 | 0.0002021 | 0.0308884 | 0.5188481 | 34 | S100A8/ALOX5AP/S100A9/RBP7/GSTM2/… |
| GOMF_FATTY_ACID_BINDING | 1.988337 | 0.7334556 | 0.0002265 | 0.0317318 | 0.5188481 | 22 | S100A8/ALOX5AP/S100A9/RBP7/GSTM2/… |
| GOMF_TOLL_LIKE_RECEPTOR_BINDING | 1.880313 | 0.7989108 | 0.0007662 | 0.0677905 | 0.4772708 | 12 | S100A8/S100A9/TLR1 |
| GOMF_CHEMOKINE_ACTIVITY | 1.840349 | 0.7633174 | 0.0013191 | 0.0852833 | 0.4550599 | 14 | CXCL10/CXCL9/CCL4/CXCL11/CXCL8/… |
| GOMF_MUSCLE_ALPHA_ACTININ_BINDING | -1.694952 | -0.8518713 | 0.0018847 | 0.0990080 | 0.4550599 | 7 | PDLIM2/PKD2L1/PDLIM5 |
| GOMF_GTPASE_INHIBITOR_ACTIVITY | -1.739581 | -0.7999095 | 0.0012166 | 0.0830559 | 0.4550599 | 10 | IQGAP2/RHOH/IPO5/IQGAP1 |
| GOMF_ENDORIBONUCLEASE_ACTIVITY | -1.745404 | -0.5422953 | 0.0012352 | 0.0830559 | 0.4550599 | 50 | POP4/DBR1/POP5/RPP30/LACTB2/… |
| GOMF_STRUCTURAL_MOLECULE_ACTIVITY_CONFERRING_ELASTICITY | -1.710554 | -0.9407178 | 0.0004029 | 0.0403182 | 0.4984931 | 5 | EMILIN2/LAMC1 |
| GOMF_GAMMA_TUBULIN_BINDING | -1.916167 | -0.6931893 | 0.0003459 | 0.0403182 | 0.4984931 | 24 | LYN/BRCA2/BLOC1S2/WASHC1/TUBGCP6/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_IMMUNE_RECEPTOR_ACTIVITY | 2.371653 | 0.6794128 | 0.0000004 | 0.0005664 | 0.6749629 | 41 | HLA-DQA1/IFNGR2/FCGR1A/HLA-DRB1/HLA-DPA1/… |
| GOMF_MHC_CLASS_II_PROTEIN_COMPLEX_BINDING | 2.283245 | 0.7498025 | 0.0000181 | 0.0134317 | 0.5756103 | 23 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/CD4/… |
| GOMF_MHC_CLASS_II_RECEPTOR_ACTIVITY | 2.255942 | 0.8883127 | 0.0000690 | 0.0290381 | 0.5384341 | 9 | HLA-DQA1/HLA-DRB1/HLA-DPA1/HLA-DQB1/HLA-DRA/… |
| GOMF_RAGE_RECEPTOR_BINDING | 1.935802 | 0.9524268 | 0.0000782 | 0.0290381 | 0.5384341 | 5 | S100A8/S100A12/HMGB2 |
| GOMF_MHC_PROTEIN_COMPLEX_BINDING | 2.180967 | 0.6837297 | 0.0002132 | 0.0527555 | 0.5188481 | 26 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/CD4/… |
| GOMF_ENDORIBONUCLEASE_ACTIVITY_PRODUCING_3_PHOSPHOMONOESTERS | 1.723298 | 0.9815504 | 0.0002046 | 0.0527555 | 0.5188481 | 3 | RNASE2/RNASE1/TSEN34 |
| GOMF_CARBOXYLIC_ACID_BINDING | 2.066286 | 0.6265775 | 0.0002909 | 0.0617214 | 0.4984931 | 30 | S100A8/S100A9/PTGDS/SLC19A1/SCP2/… |
| GOMF_SEQUENCE_SPECIFIC_MRNA_BINDING | -1.674101 | -0.9327355 | 0.0003886 | 0.0721290 | 0.4984931 | 5 | DHX9/ETF1 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOMF_ANTIGEN_BINDING | 2.206274 | 0.7773854 | 0.0e+00 | 0.0000062 | 0.7614608 | 65 | JCHAIN/IGHA1/IGKC/TRAV8-4/TRBV12-3/… |
| GOMF_IMMUNOGLOBULIN_RECEPTOR_BINDING | 2.061595 | 0.8908119 | 3.8e-06 | 0.0030738 | 0.6105269 | 17 | JCHAIN/IGHA1/IGKC/IGLL5/IGHA2/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_CHROMOSOMAL_REGION | 2.153562 | 0.6019238 | 0.0000000 | 0.0000000 | 1.0073180 | 294 | CENPM/MCM4/CENPN/MCM2/MCM7/… |
| GOCC_T_CELL_RECEPTOR_COMPLEX | 2.463714 | 0.8062666 | 0.0000000 | 0.0000000 | 0.9653278 | 73 | TRBV7-9/TRBV28/CD8B/TRAV8-4/TRBV30/… |
| GOCC_PLASMA_MEMBRANE_SIGNALING_RECEPTOR_COMPLEX | 2.345398 | 0.7105977 | 0.0000000 | 0.0000000 | 0.9436322 | 132 | TRBV7-9/TRBV28/CD8B/TRAV8-4/TRBV30/… |
| GOCC_RECEPTOR_COMPLEX | 2.190003 | 0.6324329 | 0.0000000 | 0.0000000 | 0.8986712 | 201 | TRBV7-9/TRBV28/CD8B/TRAV8-4/TRBV30/… |
| GOCC_PLASMA_MEMBRANE_PROTEIN_COMPLEX | 2.042605 | 0.5792467 | 0.0000000 | 0.0000000 | 0.8634154 | 266 | TRBV7-9/TRBV28/CD8B/TRAV8-4/TRBV30/… |
| GOCC_NUCLEAR_CHROMOSOME | 2.121877 | 0.6186505 | 0.0000000 | 0.0000000 | 0.8140358 | 180 | H3C2/GINS2/MCM4/MCM2/MCM7/… |
| GOCC_PROTEIN_DNA_COMPLEX | 2.184620 | 0.6481419 | 0.0000000 | 0.0000000 | 0.8140358 | 150 | H3C2/GINS2/MCM2/CDC45/H2AX/… |
| GOCC_CONDENSED_CHROMOSOME | 2.074924 | 0.5995676 | 0.0000000 | 0.0000000 | 0.8140358 | 199 | CENPM/CENPN/MKI67/CENPU/ZWINT/… |
| GOCC_DNA_PACKAGING_COMPLEX | 2.242180 | 0.7117612 | 0.0000000 | 0.0000001 | 0.7881868 | 92 | H3C2/NCAPH/SMC4/H2BC11/H2AZ1/… |
| GOCC_CONDENSED_CHROMOSOME_CENTROMERIC_REGION | 2.000353 | 0.5969363 | 0.0000003 | 0.0000236 | 0.6749629 | 145 | CENPM/CENPN/CENPU/ZWINT/CENPW/… |
| GOCC_DENDRITE_MEMBRANE | -1.673108 | -0.9045261 | 0.0045097 | 0.0987124 | 0.4070179 | 5 | ATP2B1/AKAP5 |
| GOCC_CHLORIDE_CHANNEL_COMPLEX | -1.725759 | -0.8015903 | 0.0031206 | 0.0749714 | 0.4317077 | 9 | OSTM1/TTYH2/CLCC1 |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | -1.837411 | -0.5694257 | 0.0023245 | 0.0620873 | 0.4317077 | 50 | RPL6/RPL36A/RPL10A/RPL5/RPL36/… |
| GOCC_POLYSOMAL_RIBOSOME | -1.838438 | -0.6344751 | 0.0023322 | 0.0620873 | 0.4317077 | 29 | RPL6/RPL36A/RPL10A/RPL36/RPS23/… |
| GOCC_INTEGRAL_COMPONENT_OF_LYSOSOMAL_MEMBRANE | -1.657898 | -0.9489220 | 0.0013453 | 0.0456942 | 0.4550599 | 4 | SLC46A3 |
| GOCC_CYTOSOLIC_RIBOSOME | -1.697702 | -0.4786446 | 0.0011482 | 0.0403915 | 0.4550599 | 91 | RPL6/RPL36A/RPL10A/RPL5/RPL36/… |
| GOCC_RIBOSOME | -1.552780 | -0.3895046 | 0.0008887 | 0.0336693 | 0.4772708 | 194 | MPV17L2/MRPL58/RPL6/MRPL42/RPL36A/… |
| GOCC_POLYSOME | -1.865676 | -0.5621103 | 0.0008361 | 0.0329405 | 0.4772708 | 58 | RPL6/DIS3L2/RPL36A/RPL10A/RPL36/… |
| GOCC_INTRINSIC_COMPONENT_OF_VACUOLAR_MEMBRANE | -1.838011 | -0.8537298 | 0.0005833 | 0.0249817 | 0.4772708 | 9 | SLC46A3 |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_IMMUNOGLOBULIN_COMPLEX | 2.728632 | 0.7431779 | 0.0000000 | 0.0000000 | 1.2378967 | 122 | IGHV5-10-1/IGHV3-49/IGKV6-21/IGLV4-3/IGLV5-37/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX_CIRCULATING | 2.492987 | 0.7658562 | 0.0000000 | 0.0000000 | 0.9325952 | 60 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOCC_EXTERNAL_SIDE_OF_PLASMA_MEMBRANE | 1.770614 | 0.4556816 | 0.0000053 | 0.0010405 | 0.6105269 | 204 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOCC_CELL_SURFACE | 1.637783 | 0.4000528 | 0.0000051 | 0.0010405 | 0.6105269 | 347 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOCC_SIDE_OF_MEMBRANE | 1.517654 | 0.3731209 | 0.0001565 | 0.0140818 | 0.5188481 | 329 | IGHV5-10-1/IGHV3-49/IGHV3-66/IGHV1-69-2/IGHV6-1/… |
| GOCC_RIBOSOME | -1.695147 | -0.4193540 | 0.0001520 | 0.0140818 | 0.5188481 | 190 | RPL30/MRPL1/MRPL46/MRPL15/RPS5/… |
| GOCC_RIBONUCLEOPROTEIN_COMPLEX | -1.477137 | -0.3269321 | 0.0000212 | 0.0023274 | 0.5756103 | 594 | RPL30/MRPL1/EFTUD2/EBNA1BP2/USP39/… |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | -2.140987 | -0.6615629 | 0.0000129 | 0.0016015 | 0.5933255 | 49 | MRPL1/RPL22/RPL17/RPL13/RPL29/… |
| GOCC_RIBOSOMAL_SUBUNIT | -1.875132 | -0.4750649 | 0.0000103 | 0.0014601 | 0.5933255 | 160 | RPL30/MRPL1/MRPL46/MRPL15/RPS5/… |
| GOCC_LARGE_RIBOSOMAL_SUBUNIT | -2.088079 | -0.5647408 | 0.0000098 | 0.0014601 | 0.5933255 | 103 | RPL30/MRPL1/MRPL46/MRPL15/RPL22/… |
| GOCC_CYTOSOLIC_RIBOSOME | -2.118784 | -0.5902958 | 0.0000023 | 0.0007649 | 0.6272567 | 89 | RPL30/MRPL1/RPS5/RPL22/RPL35/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_T_CELL_RECEPTOR_COMPLEX | 1.903663 | 0.5366106 | 0.0000400 | 0.0195423 | 0.5573322 | 94 | TRBV18/TRBV10-1/TRAV25/TRBV11-1/TRAV5/… |
| GOCC_PLASMA_MEMBRANE_SIGNALING_RECEPTOR_COMPLEX | 1.767999 | 0.4695008 | 0.0000340 | 0.0195423 | 0.5573322 | 150 | TRBV18/TRAV27/TRBV10-1/TRAV25/TRBV11-1/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX | 1.885940 | 0.5464453 | 0.0001772 | 0.0577721 | 0.5188481 | 77 | IGKV2-30/IGHV3-49/IGLV2-23/IGLV4-69/IGKV1-16/… |
| GOCC_RECEPTOR_COMPLEX | 1.606175 | 0.4050069 | 0.0002639 | 0.0645330 | 0.4984931 | 218 | TRBV18/TRAV27/TRBV10-1/TRAV25/TRBV11-1/… |
| GOCC_PLASMA_MEMBRANE_PROTEIN_COMPLEX | 1.555189 | 0.3821801 | 0.0004614 | 0.0902469 | 0.4984931 | 279 | TRBV18/TRAV27/HLA-DQB1/TRBV10-1/TRAV25/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_SIDE_OF_MEMBRANE | 1.846697 | 0.4466567 | 0.0000000 | 0.0000207 | 0.7337620 | 308 | CXCL10/CX3CR1/FCGR2B/FCGR3A/CXCL9/… |
| GOCC_EXTERNAL_SIDE_OF_PLASMA_MEMBRANE | 1.909978 | 0.4893222 | 0.0000002 | 0.0000913 | 0.6901325 | 192 | CXCL10/CX3CR1/ITGA4/FCGR2B/CD86/… |
| GOCC_MHC_CLASS_II_PROTEIN_COMPLEX | 2.007781 | 0.8348291 | 0.0000609 | 0.0100325 | 0.5573322 | 14 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/HLA-DQA1/… |
| GOCC_ENDOPLASMIC_RETICULUM_CHAPERONE_COMPLEX | 1.948849 | 0.8805825 | 0.0000531 | 0.0100325 | 0.5573322 | 10 | SDF2L1/DNAJB11/P4HB/DNAJC10/PDIA6/… |
| GOCC_COLLAGEN_CONTAINING_EXTRACELLULAR_MATRIX | 1.822406 | 0.5069833 | 0.0000814 | 0.0100595 | 0.5384341 | 98 | S100A8/C1QA/C1QB/S100A9/C1QC/… |
| GOCC_MHC_PROTEIN_COMPLEX | 1.930571 | 0.7173907 | 0.0003378 | 0.0303714 | 0.4984931 | 22 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/HLA-DQB1/… |
| GOCC_ENDOCYTIC_VESICLE_LUMEN | 1.895734 | 0.8172113 | 0.0003854 | 0.0317595 | 0.4984931 | 12 | HP/CALR/APOA1/HYOU1/CTSL/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE_MEMBRANE | 1.868321 | 0.5999068 | 0.0004537 | 0.0345193 | 0.4984931 | 41 | HLA-DRB1/CD9/HLA-DPA1/HLA-DQB1/HLA-DQA1/… |
| GOCC_PROTEIN_LIPID_COMPLEX | 1.926354 | 0.7810465 | 0.0005202 | 0.0367516 | 0.4772708 | 16 | DBI/APOA1/MSR1/BIN1/APOO/… |
| GOCC_LUMENAL_SIDE_OF_ENDOPLASMIC_RETICULUM_MEMBRANE | 1.812523 | 0.6635407 | 0.0015532 | 0.0808476 | 0.4550599 | 25 | HLA-DRB1/HLA-DPA1/HLA-G/HLA-DQB1/HLA-DQA1/… |
| GOCC_CYTOSOLIC_RIBOSOME | -1.737803 | -0.4781041 | 0.0007586 | 0.0494222 | 0.4772708 | 89 | RPL23/RPL18A/RPL29/RPL18/RPL36AL/… |
| GOCC_CYTOSOLIC_LARGE_RIBOSOMAL_SUBUNIT | -1.844486 | -0.5587320 | 0.0008101 | 0.0494222 | 0.4772708 | 50 | RPL23/RPL18A/RPL29/RPL18/RPL36AL/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_MHC_CLASS_II_PROTEIN_COMPLEX | 2.370310 | 0.8615979 | 0.0000035 | 0.0019067 | 0.6272567 | 15 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE_MEMBRANE | 2.341537 | 0.7377665 | 0.0000041 | 0.0019067 | 0.6105269 | 26 | HLA-DQA1/FCGR1A/HLA-DRB1/HLA-DPB1/HLA-DPA1/… |
| GOCC_CLATHRIN_COATED_ENDOCYTIC_VESICLE | 2.245486 | 0.6889069 | 0.0000300 | 0.0069595 | 0.5756103 | 30 | HLA-DQA1/CLTA/FCGR1A/HLA-DRB1/HLA-DPB1/… |
| GOCC_CLATHRIN_COATED_VESICLE_MEMBRANE | 2.007773 | 0.5721868 | 0.0002714 | 0.0493861 | 0.4984931 | 38 | HLA-DQA1/CLTA/FCGR1A/HLA-DRB1/HLA-DPB1/… |
| GOCC_MHC_PROTEIN_COMPLEX | 2.063584 | 0.6905143 | 0.0005325 | 0.0585630 | 0.4772708 | 20 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| GOCC_EXTERNAL_ENCAPSULATING_STRUCTURE | 1.756124 | 0.4522910 | 0.0006322 | 0.0585630 | 0.4772708 | 64 | S100A8/S100A9/CLU/VCAN/CTSH/… |
| GOCC_ANCHORED_COMPONENT_OF_PLASMA_MEMBRANE | 1.704845 | 0.9721144 | 0.0008103 | 0.0626606 | 0.4772708 | 3 | CD14/CD2 |
| GOCC_CLATHRIN_COATED_VESICLE | 1.691293 | 0.4355936 | 0.0015421 | 0.0954059 | 0.4550599 | 64 | HLA-DQA1/CLTA/FCGR1A/HLA-DRB1/HLA-DPB1/… |
| GOCC_INTRACELLULAR_PROTEIN_CONTAINING_COMPLEX | -1.394951 | -0.4214633 | 0.0010681 | 0.0729824 | 0.4550599 | 299 | TMEM183A/UBE2D2/PCGF5/PSMD4/PAXX/… |
| GOCC_EXTRINSIC_COMPONENT_OF_ENDOPLASMIC_RETICULUM_MEMBRANE | -1.569832 | -0.9305318 | 0.0011010 | 0.0729824 | 0.4550599 | 4 | TOR1A/PML/CCDC115 |
| GOCC_COP9_SIGNALOSOME | -1.719149 | -0.7984972 | 0.0006942 | 0.0585630 | 0.4772708 | 11 | COPS9/COPS7A/COPS2/COPS3/COPS5 |
| GOCC_U5_SNRNP | -1.776350 | -0.7818248 | 0.0003193 | 0.0493861 | 0.4984931 | 14 | SNRPD1/SNRNP40/SNRPG/SNRPN/SNRPD3 |
| GOCC_CATALYTIC_COMPLEX | -1.373552 | -0.4039981 | 0.0000211 | 0.0065218 | 0.5756103 | 659 | TMEM183A/SNRPD1/UBE2D2/PCGF5/SNRNP40/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| GOCC_T_CELL_RECEPTOR_COMPLEX | 2.428293 | 0.8660192 | 0.00e+00 | 0.0000000 | 0.9759947 | 55 | TRBV24-1/TRBV7-6/TRBV7-2/TRAV3/TRAV5/… |
| GOCC_PLASMA_MEMBRANE_PROTEIN_COMPLEX | 2.024855 | 0.6560111 | 0.00e+00 | 0.0000000 | 0.9759947 | 205 | TRBV24-1/TRBV7-6/TRBV7-2/TRAV3/TRAV5/… |
| GOCC_PLASMA_MEMBRANE_SIGNALING_RECEPTOR_COMPLEX | 2.224667 | 0.7555429 | 0.00e+00 | 0.0000000 | 0.9325952 | 97 | TRBV24-1/TRBV7-6/TRBV7-2/TRAV3/TRAV20/… |
| GOCC_RECEPTOR_COMPLEX | 2.140080 | 0.7094074 | 0.00e+00 | 0.0000000 | 0.9325952 | 135 | TRBV24-1/TRBV7-6/TRBV7-2/TRAV3/TRAV5/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX | 2.270549 | 0.8803607 | 0.00e+00 | 0.0000019 | 0.7477397 | 29 | IGKV1-9/JCHAIN/IGHA1/IGKC/IGKV1-6/… |
| GOCC_IMMUNOGLOBULIN_COMPLEX_CIRCULATING | 2.116352 | 0.9200841 | 2.00e-07 | 0.0000295 | 0.6901325 | 15 | JCHAIN/IGHA1/IGKC/IGLL5/IGHA2/… |
| GOCC_IGA_IMMUNOGLOBULIN_COMPLEX | 1.673127 | 0.9869494 | 5.00e-06 | 0.0006066 | 0.6105269 | 4 | JCHAIN/IGHA1/IGHA2/IGKV3-20 |
| GOCC_EXTERNAL_SIDE_OF_PLASMA_MEMBRANE | 1.764678 | 0.5820632 | 5.70e-06 | 0.0006079 | 0.6105269 | 147 | IGHA1/IGKC/CX3CR1/FCRL6/TRGV5/… |
| GOCC_BLOOD_MICROPARTICLE | 2.035826 | 0.7727473 | 1.06e-05 | 0.0010199 | 0.5933255 | 34 | JCHAIN/IGHA1/IGKC/IGHA2/IGKV3-20/… |
| GOCC_CELL_SURFACE | 1.546065 | 0.4966569 | 3.60e-05 | 0.0031596 | 0.5573322 | 259 | IGHA1/IGKC/TYROBP/CX3CR1/FCRL6/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_CELL_CYCLE | 2.336511 | 0.6167624 | 0.0000000 | 0.0000000 | 1.5092628 | 606 | TYMS/H3C2/GINS2/CENPM/MCM4/… |
| REACTOME_CELL_CYCLE_MITOTIC | 2.416861 | 0.6461838 | 0.0000000 | 0.0000000 | 1.4675240 | 476 | TYMS/H3C2/GINS2/CENPM/MCM4/… |
| REACTOME_DNA_REPLICATION | 2.426995 | 0.7126979 | 0.0000000 | 0.0000000 | 1.0864405 | 165 | H3C2/GINS2/MCM4/MCM2/MCM7/… |
| REACTOME_CELL_CYCLE_CHECKPOINTS | 2.321792 | 0.6551597 | 0.0000000 | 0.0000000 | 1.0864405 | 239 | CENPM/MCM4/CENPN/MCM2/CLSPN/… |
| REACTOME_DNA_REPLICATION_PRE_INITIATION | 2.480731 | 0.7554312 | 0.0000000 | 0.0000000 | 1.0574636 | 125 | H3C2/MCM4/MCM2/MCM7/CDC45/… |
| REACTOME_MITOTIC_G1_PHASE_AND_G1_S_TRANSITION | 2.452254 | 0.7503150 | 0.0000000 | 0.0000000 | 1.0574636 | 122 | TYMS/MCM4/TK1/RRM2/MCM2/… |
| REACTOME_RESOLUTION_OF_SISTER_CHROMATID_COHESION | 2.257934 | 0.6973454 | 0.0000000 | 0.0000000 | 0.8513391 | 110 | CENPM/CENPN/CENPU/ZWINT/CDC20/… |
| REACTOME_ACTIVATION_OF_ATR_IN_RESPONSE_TO_REPLICATION_STRESS | 2.305606 | 0.8866198 | 0.0000000 | 0.0000000 | 0.8140358 | 29 | MCM4/MCM2/CLSPN/MCM7/CDC45/… |
| REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE | 2.278048 | 0.7832954 | 0.0000000 | 0.0000000 | 0.8140358 | 56 | CENPM/CENPN/CENPU/CENPW/H2AX/… |
| REACTOME_MEIOTIC_RECOMBINATION | 2.294077 | 0.7767556 | 0.0000000 | 0.0000001 | 0.8012156 | 63 | H3C2/RAD51/H2AX/RBBP8/H2AC14/… |
| REACTOME_ROS_AND_RNS_PRODUCTION_IN_PHAGOCYTES | -1.625722 | -0.5977521 | 0.0080684 | 0.0851781 | 0.3807304 | 25 | ATP6V1E2/NCF4/ATP6V0C/ATP6V1E1/NOS3/… |
| REACTOME_SUMOYLATION_OF_SUMOYLATION_PROTEINS | -1.639650 | -0.5492923 | 0.0060783 | 0.0673323 | 0.4070179 | 35 | NUP188/NUP88/NUP50/NUP35/NUP205/… |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | -1.539416 | -0.4243047 | 0.0047827 | 0.0549138 | 0.4070179 | 99 | RPL6/RPL36A/RPL10A/RPL5/RPL36/… |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | -1.537413 | -0.4301516 | 0.0043859 | 0.0518722 | 0.4070179 | 94 | RPL6/RPL36A/RPL10A/RPL5/RPL36/… |
| REACTOME_BETA_OXIDATION_OF_VERY_LONG_CHAIN_FATTY_ACIDS | -1.726972 | -0.7958374 | 0.0027579 | 0.0349849 | 0.4317077 | 10 | ECI2/DECR2/EHHADH/ACOX1 |
| REACTOME_ROLE_OF_ABL_IN_ROBO_SLIT_SIGNALING | -1.612813 | -0.9462357 | 0.0021098 | 0.0286094 | 0.4317077 | 4 | CAP1/CLASP2 |
| REACTOME_SLC_TRANSPORTER_DISORDERS | -1.823149 | -0.5554837 | 0.0011372 | 0.0165625 | 0.4550599 | 53 | SLC9A6/NUP188/NUP88/SLC4A4/SLC22A18/… |
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | -1.777327 | -0.5042382 | 0.0004321 | 0.0073884 | 0.4984931 | 86 | RPL6/RPL36A/RPL10A/EEF1G/RPL5/… |
| REACTOME_RRNA_PROCESSING | -1.634518 | -0.4149927 | 0.0001770 | 0.0033145 | 0.5188481 | 191 | RPL6/TBL3/ISG20L2/RPL36A/RPL10A/… |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | -1.790883 | -0.4936929 | 0.0001126 | 0.0022410 | 0.5384341 | 107 | SRP9/RPL6/TRAM1/RPL36A/RPL10A/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_COMPLEMENT_CASCADE | 2.009910 | 0.6029164 | 0.0000050 | 0.0025781 | 0.6105269 | 66 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/CFD/… |
| REACTOME_ANTIGEN_ACTIVATES_B_CELL_RECEPTOR_BCR_LEADING_TO_GENERATION_OF_SECOND_MESSENGERS | 1.975443 | 0.5805936 | 0.0000048 | 0.0025781 | 0.6105269 | 78 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_CD22_MEDIATED_BCR_REGULATION | 1.979083 | 0.6131349 | 0.0000169 | 0.0030933 | 0.5756103 | 57 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBILIZATION | 1.973968 | 0.6000759 | 0.0000171 | 0.0030933 | 0.5756103 | 62 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_FCERI_MEDIATED_CA_2_MOBILIZATION | 1.964366 | 0.5798315 | 0.0000106 | 0.0030933 | 0.5933255 | 76 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_FCERI_MEDIATED_MAPK_ACTIVATION | 1.940688 | 0.5775116 | 0.0000129 | 0.0030933 | 0.5933255 | 72 | IGHV1-46/IGKV1D-16/JUN/IGLV2-23/IGKV3D-20/… |
| REACTOME_ROLE_OF_PHOSPHOLIPIDS_IN_PHAGOCYTOSIS | 1.944950 | 0.5805449 | 0.0000230 | 0.0032541 | 0.5756103 | 71 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT | 1.954073 | 0.6053867 | 0.0000281 | 0.0036550 | 0.5756103 | 57 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/CFD/… |
| REACTOME_CREATION_OF_C4_AND_C2_ACTIVATORS | 1.943205 | 0.6062028 | 0.0000476 | 0.0049445 | 0.5573322 | 54 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_FCGR_ACTIVATION | 1.899748 | 0.5826955 | 0.0000732 | 0.0060082 | 0.5384341 | 60 | IGHV1-46/IGKV1D-16/IGLV2-23/IGKV3D-20/IGHV4-34/… |
| REACTOME_REGULATION_BY_C_FLIP | -1.904890 | -0.8620732 | 0.0009415 | 0.0432820 | 0.4772708 | 9 | TNFSF10 |
| REACTOME_CASPASE_ACTIVATION_VIA_DEATH_RECEPTORS_IN_THE_PRESENCE_OF_LIGAND | -1.910440 | -0.7941829 | 0.0009439 | 0.0432820 | 0.4772708 | 12 | TNFSF10 |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | -1.939184 | -0.5161508 | 0.0001256 | 0.0093251 | 0.5188481 | 99 | RPL30/RPS5/RPL22/RPL35/RPL17/… |
| REACTOME_NONSENSE_MEDIATED_DECAY_NMD | -2.000480 | -0.5211696 | 0.0001032 | 0.0080426 | 0.5384341 | 111 | RPL30/RPS5/RPL22/GSPT2/RPL35/… |
| REACTOME_CELLULAR_RESPONSE_TO_STARVATION | -1.847517 | -0.4701529 | 0.0000642 | 0.0058851 | 0.5384341 | 137 | RPL30/NPRL3/FNIP1/RPS5/RPL22/… |
| REACTOME_INFLUENZA_INFECTION | -1.841868 | -0.4609878 | 0.0000590 | 0.0057483 | 0.5573322 | 150 | RPL30/NUP93/RPS5/NUP98/RPL22/… |
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | -2.078395 | -0.5672509 | 0.0000340 | 0.0040765 | 0.5573322 | 87 | RPL30/RPS5/RPL22/RPL35/RPL17/… |
| REACTOME_RRNA_PROCESSING | -1.857234 | -0.4566138 | 0.0000124 | 0.0030933 | 0.5933255 | 189 | RPL30/EBNA1BP2/WDR12/RPP40/TRMT10C/… |
| REACTOME_SELENOAMINO_ACID_METABOLISM | -2.010487 | -0.5373524 | 0.0000198 | 0.0030933 | 0.5756103 | 98 | RPL30/RPS5/RPL22/RPL35/RPL17/… |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | -2.025702 | -0.5315205 | 0.0000194 | 0.0030933 | 0.5756103 | 108 | RPL30/RPS5/RPN2/RPL22/RPL35/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM | 1.649233 | 0.3886906 | 0.0000002 | 0.0003546 | 0.6901325 | 485 | IFITM3/HLA-DQB1/SOCS2/PIM1/IFNGR2/… |
| REACTOME_SIGNALING_BY_INTERLEUKINS | 1.604933 | 0.3967749 | 0.0000201 | 0.0156992 | 0.5756103 | 301 | SOCS2/PIM1/FOS/CISH/JUN/… |
| REACTOME_GROWTH_HORMONE_RECEPTOR_SIGNALING | 2.009447 | 0.7962464 | 0.0000495 | 0.0258632 | 0.5573322 | 17 | SOCS2/CISH/SOCS3/SOCS1/STAT1/… |
| REACTOME_INTERLEUKIN_7_SIGNALING | 1.961555 | 0.7252117 | 0.0001129 | 0.0442082 | 0.5384341 | 24 | SOCS2/CISH/H3C2/SOCS1/H3C10/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_MHC_CLASS_II_ANTIGEN_PRESENTATION | 1.974478 | 0.5634924 | 0.0000079 | 0.0122063 | 0.5933255 | 87 | TUBB2A/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/… |
| REACTOME_GPCR_LIGAND_BINDING | 1.912075 | 0.5410320 | 0.0000226 | 0.0122063 | 0.5756103 | 96 | CXCL10/CX3CR1/CXCL9/PTGER2/FPR2/… |
| REACTOME_PD_1_SIGNALING | 2.020237 | 0.7966822 | 0.0001194 | 0.0358340 | 0.5384341 | 16 | HLA-DRB1/HLA-DPA1/HLA-DQB1/HLA-DQA1/HLA-DPB1/… |
| REACTOME_CHEMOKINE_RECEPTORS_BIND_CHEMOKINES | 1.995526 | 0.7619693 | 0.0001373 | 0.0358340 | 0.5188481 | 20 | CXCL10/CX3CR1/CXCL9/CCL3L3/CCL4/… |
| REACTOME_DEPOSITION_OF_NEW_CENPA_CONTAINING_NUCLEOSOMES_AT_THE_CENTROMERE | 1.905490 | 0.6088303 | 0.0002177 | 0.0402353 | 0.5188481 | 45 | CENPX/H2AZ2/H2AC8/H2BC11/MIS18BP1/… |
| REACTOME_GENERATION_OF_SECOND_MESSENGER_MOLECULES | 1.910266 | 0.6833438 | 0.0006296 | 0.0646308 | 0.4772708 | 26 | HLA-DRB1/HLA-DPA1/HLA-DQB1/HLA-DQA1/HLA-DPB1/… |
| REACTOME_APC_C_MEDIATED_DEGRADATION_OF_CELL_CYCLE_PROTEINS | 1.684110 | 0.4931344 | 0.0006350 | 0.0646308 | 0.4772708 | 76 | PTTG1/UBE2C/UBE2D1/PSMB3/PSMD3/… |
| REACTOME_CHROMOSOME_MAINTENANCE | 1.680580 | 0.4689999 | 0.0005802 | 0.0646308 | 0.4772708 | 106 | CENPX/RFC4/H2AZ2/H2AC8/H2BC11/… |
| REACTOME_PEPTIDE_LIGAND_BINDING_RECEPTORS | 1.809106 | 0.5990433 | 0.0010158 | 0.0757483 | 0.4550599 | 40 | CXCL10/CX3CR1/CXCL9/FPR2/CCL3L3/… |
| REACTOME_CLASS_A_1_RHODOPSIN_LIKE_RECEPTORS | 1.741688 | 0.5429726 | 0.0009274 | 0.0757483 | 0.4772708 | 53 | CXCL10/CX3CR1/CXCL9/CCL3L3/CYSLTR1/… |
| REACTOME_EUKARYOTIC_TRANSLATION_INITIATION | -1.594646 | -0.4515595 | 0.0014797 | 0.0965472 | 0.4550599 | 94 | EIF5/RPL23/RPL18A/RPL29/RPL18/… |
| REACTOME_IL_6_TYPE_CYTOKINE_RECEPTOR_LIGAND_INTERACTIONS | -1.759275 | -0.8961184 | 0.0010663 | 0.0758989 | 0.4550599 | 6 | TYK2 |
| REACTOME_HUR_ELAVL1_BINDS_AND_STABILIZES_MRNA | -1.778908 | -0.8289656 | 0.0010054 | 0.0757483 | 0.4550599 | 8 | TNFSF13/PRKCD |
| REACTOME_INTERLEUKIN_6_FAMILY_SIGNALING | -1.893517 | -0.7834492 | 0.0007116 | 0.0655468 | 0.4772708 | 13 | TYK2/CBL |
| REACTOME_NONSENSE_MEDIATED_DECAY_NMD | -1.626813 | -0.4475012 | 0.0006603 | 0.0646308 | 0.4772708 | 110 | RPL23/RPL18A/RPL29/RPL36AL/FAU/… |
| REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE | -1.684179 | -0.4664993 | 0.0002866 | 0.0408023 | 0.4984931 | 106 | SRP72/RPL23/RPL18A/RPL29/RPL18/… |
| REACTOME_RESPONSE_OF_EIF2AK4_GCN2_TO_AMINO_ACID_DEFICIENCY | -1.736423 | -0.4873728 | 0.0002312 | 0.0402353 | 0.5188481 | 98 | DDIT3/RPL23/RPL18A/RPL29/RPL18/… |
| REACTOME_INTERLEUKIN_35_SIGNALLING | -1.874964 | -0.8737273 | 0.0001602 | 0.0358340 | 0.5188481 | 8 | TYK2 |
| REACTOME_EUKARYOTIC_TRANSLATION_ELONGATION | -1.827911 | -0.5246989 | 0.0000465 | 0.0182012 | 0.5573322 | 86 | EEF1D/RPL23/RPL18A/RPL29/RPL18/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_PD_1_SIGNALING | 2.359489 | 0.9174443 | 0.0000003 | 0.0005014 | 0.6749629 | 12 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/CD4/… |
| REACTOME_ANTIMICROBIAL_PEPTIDES | 2.276290 | 0.8737225 | 0.0000049 | 0.0018192 | 0.6105269 | 13 | S100A8/S100A9/CLU/CD4 |
| REACTOME_TOLL_LIKE_RECEPTOR_TLR1_TLR2_CASCADE | 2.171612 | 0.6254880 | 0.0000041 | 0.0018192 | 0.6105269 | 44 | S100A8/CD14/S100A9/S100A12/MEF2A/… |
| REACTOME_DISEASES_OF_IMMUNE_SYSTEM | 2.237486 | 0.8398287 | 0.0000126 | 0.0037327 | 0.5933255 | 14 | S100A8/CD14/S100A9/NFKB1 |
| REACTOME_IRAK4_DEFICIENCY_TLR2_4 | 2.217217 | 0.8918020 | 0.0000153 | 0.0037688 | 0.5756103 | 11 | S100A8/CD14/S100A9 |
| REACTOME_REGULATION_OF_TLR_BY_ENDOGENOUS_LIGAND | 2.164041 | 0.9115141 | 0.0000281 | 0.0059318 | 0.5756103 | 9 | S100A8/CD14/S100A9/GSDMD |
| REACTOME_GENERATION_OF_SECOND_MESSENGER_MOLECULES | 2.107443 | 0.7523408 | 0.0002823 | 0.0479045 | 0.4984931 | 17 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/CD4/… |
| REACTOME_TOLL_LIKE_RECEPTOR_CASCADES | 1.835710 | 0.4712314 | 0.0006201 | 0.0834250 | 0.4772708 | 65 | S100A8/CD14/S100A9/S100A12/MEF2A/… |
| REACTOME_TRANSLATION | -1.443926 | -0.4391890 | 0.0004220 | 0.0624586 | 0.4984931 | 236 | RPL22L1/AURKAIP1/EIF3A/YARS1/EIF2S3/… |
| REACTOME_MRNA_SPLICING | -1.533618 | -0.4797735 | 0.0002913 | 0.0479045 | 0.4984931 | 153 | SNRPD1/PUF60/SNRNP40/SNRPG/CASC3/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| REACTOME_FCERI_MEDIATED_MAPK_ACTIVATION | 2.109754 | 0.8243360 | 0.0000097 | 0.0099276 | 0.5933255 | 28 | IGHV1-2/FOS/FCER1G/IGKV3-20/IGKV4-1/… |
| REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL | 1.931184 | 0.6760622 | 0.0000130 | 0.0099276 | 0.5933255 | 70 | IGHV1-2/TRAV8-4/TYROBP/TRBV12-3/KLRG1/… |
| REACTOME_CD22_MEDIATED_BCR_REGULATION | 1.974327 | 0.8717231 | 0.0000394 | 0.0129559 | 0.5573322 | 14 | IGHV1-2/CD79B/CD79A/IGKV3-20/IGHM/… |
| REACTOME_CELL_SURFACE_INTERACTIONS_AT_THE_VASCULAR_WALL | 1.953504 | 0.6885833 | 0.0000310 | 0.0129559 | 0.5573322 | 65 | IGHV1-2/JCHAIN/IGHA1/FCER1G/IGHA2/… |
| REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA | 1.937776 | 0.8842414 | 0.0000474 | 0.0129559 | 0.5573322 | 12 | IGHV1-2/IGHA1/IGHA2/IGKV3-20/IGHV4-59 |
| REACTOME_DNA_DAMAGE_TELOMERE_STRESS_INDUCED_SENESCENCE | 1.859313 | 0.6651506 | 0.0000508 | 0.0129559 | 0.5573322 | 57 | H2BC13/LMNB1/H2BC14/H2BC3/H1-1/… |
| REACTOME_SENESCENCE_ASSOCIATED_SECRETORY_PHENOTYPE_SASP | 1.727124 | 0.5963017 | 0.0001164 | 0.0254469 | 0.5384341 | 82 | FOS/H2BC13/H2BC14/H2BC3/H2BC11/… |
| REACTOME_RUNX1_REGULATES_GENES_INVOLVED_IN_MEGAKARYOCYTE_DIFFERENTIATION_AND_PLATELET_FUNCTION | 1.780213 | 0.6322222 | 0.0002977 | 0.0569338 | 0.4984931 | 61 | H2BC13/H2BC14/H2BC3/PRMT1/H2BC11/… |
| REACTOME_FCGR_ACTIVATION | 2.002534 | 0.8512086 | 0.0004058 | 0.0620838 | 0.4984931 | 17 | IGHV1-2/IGKV3-20/CD247/IGKV4-1/IGHV4-59/… |
| REACTOME_ACTIVATED_PKN1_STIMULATES_TRANSCRIPTION_OF_AR_ANDROGEN_RECEPTOR_REGULATED_GENES_KLK2_AND_KLK3 | 1.829496 | 0.6737836 | 0.0007073 | 0.0983737 | 0.4772708 | 44 | H2BC13/H2BC14/H2BC3/H2BC11/H2AC6/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 2.171408 | 0.6966576 | 0.0000000 | 0.0000007 | 0.7477397 | 84 | H3C2/H2AX/H2AC14/H2BC11/H2AZ1/… |
| KEGG_CELL_CYCLE | 2.141807 | 0.6693173 | 0.0000000 | 0.0000007 | 0.7614608 | 106 | MCM2/MCM7/PCNA/CDC45/CHEK1/… |
| KEGG_DNA_REPLICATION | 2.209945 | 0.8330369 | 0.0000002 | 0.0000099 | 0.6901325 | 30 | MCM4/MCM2/MCM7/PCNA/FEN1/… |
| KEGG_HEMATOPOIETIC_CELL_LINEAGE | 1.985047 | 0.7252280 | 0.0000903 | 0.0041978 | 0.5384341 | 37 | CD8B/CD3G/IL7R/CD3E/CD3D/… |
| KEGG_ONE_CARBON_POOL_BY_FOLATE | 1.939099 | 0.8656199 | 0.0001199 | 0.0044585 | 0.5384341 | 14 | TYMS/DHFR/SHMT2/SHMT1/MTHFD2/… |
| KEGG_PRIMARY_IMMUNODEFICIENCY | 1.926642 | 0.7595443 | 0.0001469 | 0.0045542 | 0.5188481 | 26 | CD8B/IL7R/CD3E/CD3D/UNG/… |
| KEGG_P53_SIGNALING_PATHWAY | 1.799546 | 0.6276497 | 0.0004796 | 0.0127436 | 0.4984931 | 53 | RRM2/CHEK1/GTSE1/BAX/CCNB2/… |
| KEGG_OOCYTE_MEIOSIS | 1.764657 | 0.5788121 | 0.0006797 | 0.0158039 | 0.4772708 | 76 | PKMYT1/CDC20/PTTG1/YWHAE/SGO1/… |
| KEGG_GLYCINE_SERINE_AND_THREONINE_METABOLISM | 1.713269 | 0.7798135 | 0.0044324 | 0.0716371 | 0.4070179 | 13 | SHMT2/PSAT1/PSPH/SHMT1/PHGDH/… |
| KEGG_GLUTATHIONE_METABOLISM | 1.700636 | 0.6373135 | 0.0046217 | 0.0716371 | 0.4070179 | 32 | GSTM1/GGT7/GSTM2/GSR/RRM1/… |
| KEGG_RIBOSOME | -1.642708 | -0.4668809 | 0.0013922 | 0.0287728 | 0.4550599 | 84 | RPL6/RPL36A/RPL10A/RPL5/RPL36/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_RIBOSOME | -2.058845 | -0.5795824 | 1.04e-05 | 0.0019406 | 0.5933255 | 84 | RPL30/RPS5/RPL22/RPL35/RPL17/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 1.852556 | 0.5516434 | 0.0001120 | 0.0207161 | 0.5384341 | 71 | HLA-DQB1/CD40LG/H3C2/H3C10/H3C11/… |
| KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION | 1.767137 | 0.5097968 | 0.0002292 | 0.0212001 | 0.5188481 | 90 | IFNGR2/CD40LG/TNFRSF18/CCL5/TNFRSF4/… |
| KEGG_LEISHMANIA_INFECTION | 1.777612 | 0.5759608 | 0.0010579 | 0.0652342 | 0.4550599 | 44 | HLA-DQB1/IFNGR2/FOS/NFKBIA/HLA-DQA1/… |
| KEGG_JAK_STAT_SIGNALING_PATHWAY | 1.705611 | 0.5024723 | 0.0015053 | 0.0696205 | 0.4550599 | 75 | SOCS2/PIM1/IFNGR2/CISH/SOCS3/… |
| KEGG_ASTHMA | 1.753141 | 0.7114749 | 0.0026394 | 0.0976577 | 0.4317077 | 16 | HLA-DQB1/CD40LG/HLA-DQA1/IL10/HLA-DRA |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS | 2.467692 | 0.7116934 | 0.0000000 | 0.0000000 | 0.8753251 | 78 | FCGR2B/CD86/C1QA/C1QB/FCGR3A/… |
| KEGG_HEMATOPOIETIC_CELL_LINEAGE | 2.084252 | 0.6643323 | 0.0000079 | 0.0007360 | 0.5933255 | 44 | ITGA4/HLA-DRB1/CD9/CD37/CD4/… |
| KEGG_ASTHMA | 2.031817 | 0.8275269 | 0.0000626 | 0.0029120 | 0.5384341 | 15 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-DQB1/HLA-DQA1/… |
| KEGG_GRAFT_VERSUS_HOST_DISEASE | 1.983153 | 0.7145278 | 0.0001142 | 0.0042481 | 0.5384341 | 25 | CD86/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/… |
| KEGG_AUTOIMMUNE_THYROID_DISEASE | 1.938784 | 0.7348842 | 0.0001764 | 0.0054671 | 0.5188481 | 21 | CD86/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/… |
| KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION | 1.879187 | 0.5763188 | 0.0002234 | 0.0059352 | 0.5188481 | 51 | HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/HLA-DQB1/… |
| KEGG_TYPE_I_DIABETES_MELLITUS | 1.922818 | 0.6927890 | 0.0002886 | 0.0059826 | 0.4984931 | 25 | CD86/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/… |
| KEGG_CELL_ADHESION_MOLECULES_CAMS | 1.839754 | 0.5574219 | 0.0002895 | 0.0059826 | 0.4984931 | 57 | ITGA4/CD86/HLA-DRB1/HLA-DMB/HLA-DPA1/… |
| KEGG_ALLOGRAFT_REJECTION | 1.904478 | 0.6861813 | 0.0003380 | 0.0062861 | 0.4984931 | 25 | CD86/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/… |
| KEGG_VIRAL_MYOCARDITIS | 1.889792 | 0.6082394 | 0.0004968 | 0.0084012 | 0.4772708 | 42 | CD86/HLA-DRB1/HLA-DMB/HLA-DPA1/HLA-G/… |
| KEGG_TYPE_II_DIABETES_MELLITUS | -1.621156 | -0.6346345 | 0.0069573 | 0.0862711 | 0.4070179 | 19 | HK1/PIK3CG/PRKCD/MAPK1/HK3/… |
| KEGG_RIBOSOME | -1.811246 | -0.5190738 | 0.0000417 | 0.0025854 | 0.5573322 | 84 | RPL23/RPL18A/RPL29/RPL18/RPL36AL/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KEGG_INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION | 2.213485 | 0.7466168 | 0.0000352 | 0.0032235 | 0.5573322 | 21 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| KEGG_LEISHMANIA_INFECTION | 2.115934 | 0.6404779 | 0.0000277 | 0.0032235 | 0.5756103 | 34 | HLA-DQA1/IFNGR2/FCGR1A/HLA-DRB1/HLA-DPB1/… |
| KEGG_CELL_ADHESION_MOLECULES_CAMS | 1.958053 | 0.5846113 | 0.0003231 | 0.0197077 | 0.4984931 | 36 | HLA-DQA1/VCAN/HLA-DRB1/HLA-DPB1/HLA-DPA1/… |
| KEGG_ASTHMA | 2.001552 | 0.7615573 | 0.0013252 | 0.0441455 | 0.4550599 | 13 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| KEGG_ALLOGRAFT_REJECTION | 1.993175 | 0.6867902 | 0.0018186 | 0.0441455 | 0.4550599 | 19 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| KEGG_HEMATOPOIETIC_CELL_LINEAGE | 1.958328 | 0.6422632 | 0.0016543 | 0.0441455 | 0.4550599 | 23 | CD14/HLA-DRB1/CD4/CD37/HLA-DRA/… |
| KEGG_GRAFT_VERSUS_HOST_DISEASE | 1.941524 | 0.6672288 | 0.0013107 | 0.0441455 | 0.4550599 | 20 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| KEGG_TYPE_I_DIABETES_MELLITUS | 1.901411 | 0.6534435 | 0.0019299 | 0.0441455 | 0.4550599 | 20 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| KEGG_AUTOIMMUNE_THYROID_DISEASE | 1.949401 | 0.6884058 | 0.0027751 | 0.0564267 | 0.4317077 | 18 | HLA-DQA1/HLA-DRB1/HLA-DPB1/HLA-DPA1/HLA-DQB1/… |
| KEGG_VIBRIO_CHOLERAE_INFECTION | -1.620600 | -0.6272288 | 0.0043339 | 0.0793103 | 0.4070179 | 27 | KDELR1/ATP6V1B2/KCNQ1/PDIA4/ATP6V0D1/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HSD17B8_TARGET_GENES | 2.577136 | 0.6971699 | 0.0000000 | 0.0000000 | 1.6719967 | 447 | GINS2/TK1/RRM2/CENPN/MKI67/… |
| E2F5_TARGET_GENES | 1.594857 | 0.4167934 | 0.0000000 | 0.0000046 | 0.7337620 | 874 | GINS2/TK1/RRM2/MKI67/MCM2/… |
| PSMB5_TARGET_GENES | 1.840508 | 0.5245759 | 0.0000006 | 0.0000991 | 0.6594444 | 229 | RRM2/STMN1/CKS2/TUBA1B/H2AX/… |
| SETD7_TARGET_GENES | 1.454402 | 0.3819497 | 0.0000877 | 0.0090553 | 0.5384341 | 708 | TYMS/RRM2/STMN1/DHFR/MCM7/… |
| PHF21A_TARGET_GENES | 1.694348 | 0.4944367 | 0.0002203 | 0.0189432 | 0.5188481 | 181 | PTGDS/TK1/RRM2/CDCA7/UHRF1/… |
| LHX3_TARGET_GENES | 1.871275 | 0.7355436 | 0.0004250 | 0.0274101 | 0.4984931 | 25 | RBBP8/TOP2A/H2BC9/H2AC4/F12/… |
| AEBP2_TARGET_GENES | 1.372416 | 0.3590654 | 0.0005077 | 0.0291097 | 0.4772708 | 802 | TYMS/CENPM/MCM4/TK1/CENPN/… |
| POU2AF1_TARGET_GENES | 1.381042 | 0.3645280 | 0.0010371 | 0.0486511 | 0.4550599 | 620 | TYMS/CENPN/CDCA7/IGLV4-69/CDC45/… |
| ZNF563_TARGET_GENES | -1.363622 | -0.3213080 | 0.0016575 | 0.0712735 | 0.4550599 | 353 | WDR55/PIH1D1/DGKZ/NUP188/ZNF559/… |
| SUPT20H_TARGET_GENES | -1.207036 | -0.2654213 | 0.0009148 | 0.0472019 | 0.4772708 | 1078 | MPV17L2/CCDC191/ZFAS1/TOR1AIP1/DENR/… |
| DIDO1_TARGET_GENES | -1.217403 | -0.2632020 | 0.0004196 | 0.0274101 | 0.4984931 | 1306 | ARL1/CCDC191/PMPCB/TOR1AIP1/RPL6/… |
| HOXA1_TARGET_GENES | -1.363433 | -0.3021310 | 0.0000126 | 0.0016197 | 0.5933255 | 864 | RANBP3/SLC9A6/ARL1/ACAT1/CCDC191/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| NKX2_3_TARGET_GENES | -1.437627 | -0.3235113 | 0.0002802 | 0.0481984 | 0.4984931 | 358 | INTS2/TTC33/ACADSB/M6PR/CSKMT/… |
| NFRKB_TARGET_GENES | -1.250958 | -0.2551818 | 0.0000725 | 0.0231272 | 0.5384341 | 1464 | BRD8/MIGA1/RAB7A/AAR2/SRRD/… |
| ELF2_TARGET_GENES | -1.279697 | -0.2621921 | 0.0000896 | 0.0231272 | 0.5384341 | 1169 | TSG101/VPS50/RAB7A/RNF115/AAR2/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| HSD17B8_TARGET_GENES | 1.471109 | 0.3539828 | 0.0004746 | 0.0796607 | 0.4984931 | 341 | BIRC5/MKI67/PTTG1/TROAP/CDKN3/… |
| DIDO1_TARGET_GENES | -1.246076 | -0.2675079 | 0.0010422 | 0.0796607 | 0.4550599 | 1243 | WDR77/POP4/MRPL54/SUPT7L/RLF/… |
| ZNF766_TARGET_GENES | -1.284510 | -0.2850324 | 0.0007870 | 0.0796607 | 0.4772708 | 759 | NEAT1/LUCAT1/PA2G4/LYN/TMEM115/… |
| DLX4_TARGET_GENES | -1.344985 | -0.3022557 | 0.0005310 | 0.0796607 | 0.4772708 | 615 | WDR77/SUPT7L/CSNK1G1/PFKM/SF3B5/… |
| GLI3_TARGET_GENES | -1.345558 | -0.3052829 | 0.0007172 | 0.0796607 | 0.4772708 | 545 | SVIP/RLF/R3HDM2/APRT/SMC4/… |
| RFX7_TARGET_GENES | -1.399458 | -0.3289083 | 0.0010828 | 0.0796607 | 0.4550599 | 363 | TYK2/WDR77/NEMF/USP48/U2AF2/… |
| ELF5_TARGET_GENES | -1.459133 | -0.3441153 | 0.0004178 | 0.0796607 | 0.4984931 | 341 | HNRNPF/SVIP/LYN/UBL7/WDR43/… |
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Critical. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot a summary of the gene sets/pathways that are enriched in genes overexpressed in Covid_Mild. If there are more than 30 significant pathways total, we plot the top 5 per cluster with the lowest adjusted P value.
Here we plot the pathways that are significantly enriched in the differentially expressed genes between Covid_Critical and Covid_Mild for each cluster.
For each condition, we display a table of the GSEA results. NES refers to Normalized Enrichment Score, the main effect size for GSEA. Positive NES indicates the pathway is enriched in Covid_Critical relative to Covid_Mild, and vice-versa.
If a cluster is missing, it indicates no pathways were significantly differentially expressed.
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| E2F_Q3 | 2.224835 | 0.6517409 | 0.0000000 | 0.0000000 | 0.8753251 | 165 | MCM4/MCM2/STMN1/CDCA7/MCM7/… |
| E2F1_Q4_01 | 2.167436 | 0.6323449 | 0.0000000 | 0.0000000 | 0.8634154 | 173 | MCM4/MCM2/STMN1/CDCA7/MCM7/… |
| E2F_Q4_01 | 2.170634 | 0.6295761 | 0.0000000 | 0.0000000 | 0.8513391 | 180 | MCM4/MCM2/STMN1/CDCA7/MCM7/… |
| E2F_Q6_01 | 2.110619 | 0.6141850 | 0.0000000 | 0.0000000 | 0.8390889 | 176 | MCM4/RRM2/MCM2/STMN1/CDCA7/… |
| E2F_Q3_01 | 2.102959 | 0.6092334 | 0.0000000 | 0.0000001 | 0.8012156 | 182 | MCM4/MCM2/STMN1/CDCA7/MCM7/… |
| E2F_Q4 | 2.076081 | 0.6000867 | 0.0000000 | 0.0000001 | 0.7881868 | 187 | MCM4/RRM2/MCM2/CLSPN/STMN1/… |
| E2F_03 | 2.052959 | 0.5969960 | 0.0000000 | 0.0000002 | 0.7749390 | 179 | MCM4/MCM2/STMN1/CDCA7/MCM7/… |
| E2F4DP1_01 | 2.006712 | 0.5775346 | 0.0000000 | 0.0000010 | 0.7477397 | 190 | MCM4/RRM2/MCM2/CLSPN/STMN1/… |
| E2F1DP1RB_01 | 2.057916 | 0.6109312 | 0.0000000 | 0.0000021 | 0.7195128 | 145 | RRM2/MCM2/CLSPN/STMN1/CDCA7/… |
| KRCTCNNNNMANAGC_UNKNOWN | 2.163679 | 0.7694695 | 0.0000008 | 0.0000312 | 0.6594444 | 43 | H3C2/UBE2C/H2AC14/H2BC11/H2AC16/… |
| OCT1_03 | -1.484513 | -0.4114424 | 0.0038742 | 0.0908938 | 0.4317077 | 110 | TP53BP1/ARL1/RMI1/AAK1/AMD1/… |
| AAGWWRNYGGC_UNKNOWN | -1.592205 | -0.4453427 | 0.0019950 | 0.0507073 | 0.4317077 | 99 | ARL1/TMEM187/PCIF1/MYLIP/TRIM8/… |
| pathway | NES | ES | pval | padj | log2err | size | leadingEdge |
|---|---|---|---|---|---|---|---|
| KRCTCNNNNMANAGC_UNKNOWN | 2.00142 | 0.6711829 | 5.1e-05 | 0.0311284 | 0.5573322 | 36 | H1-4/H3C2/H1-2/H3C10/H3C11/… |
hourspassed <- (proc.time() - timestart)[3]/60/60
names(hourspassed) <- 'Hours'
hourspassed
## Hours
## 0.5844919
rm(list=ls())
finalmem <- gc(verbose = T, full = T)
mb <- sum(finalmem[,ncol(finalmem)])
gb <- mb / 1000
gb <- setNames(gb,'Gb used (approximately)')
gb
## Gb used (approximately)
## 3.9599
# beepr::beep()
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux 8.7 (Ootpa)
##
## Matrix products: default
## BLAS/LAPACK: /gs/gsfs0/home/aferrena/packages/miniconda3/miniconda3/envs/scdapp/lib/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] doParallel_1.0.17 iterators_1.0.14 fgsea_1.28.0
## [4] speckle_0.99.7 circlize_0.4.16 Matrix_1.6-5
## [7] ggrepel_0.9.5 ggfittext_0.10.2 ggalluvial_0.12.5
## [10] hdf5r_1.3.9 msigdbr_7.5.1 edgeR_4.0.16
## [13] limma_3.58.1 ggridges_0.5.6 ggdendro_0.2.0
## [16] ComplexHeatmap_2.18.0 glmGamPoi_1.14.3 foreach_1.5.2
## [19] future_1.33.1 DoubletFinder_2.0.4 scDAPP_1.0.0
## [22] Seurat_5.0.2 SeuratObject_5.0.1 sp_2.1-3
## [25] RISC_1.6.0 patchwork_1.2.0 lubridate_1.9.3
## [28] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [31] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [34] tibble_3.2.1 ggplot2_3.5.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.3.3
## [3] later_1.3.2 bitops_1.0-7
## [5] polyclip_1.10-6 fastDummies_1.7.3
## [7] lifecycle_1.0.4 globals_0.16.2
## [9] lattice_0.22-5 MASS_7.3-60
## [11] magrittr_2.0.3 sass_0.4.8
## [13] rmarkdown_2.25 plotly_4.10.4
## [15] jquerylib_0.1.4 yaml_2.3.8
## [17] httpuv_1.6.14 sctransform_0.4.1
## [19] spam_2.10-0 askpass_1.2.0
## [21] spatstat.sparse_3.0-3 reticulate_1.35.0
## [23] cowplot_1.1.3 pbapply_1.7-2
## [25] RColorBrewer_1.1-3 abind_1.4-5
## [27] zlibbioc_1.48.0 Rtsne_0.17
## [29] GenomicRanges_1.54.1 BiocGenerics_0.48.1
## [31] RCurl_1.98-1.14 GenomeInfoDbData_1.2.11
## [33] IRanges_2.36.0 S4Vectors_0.40.2
## [35] irlba_2.3.5.1 listenv_0.9.1
## [37] spatstat.utils_3.0-4 pheatmap_1.0.12
## [39] umap_0.2.10.0 goftest_1.2-3
## [41] RSpectra_0.16-1 spatstat.random_3.2-3
## [43] fitdistrplus_1.1-11 parallelly_1.37.1
## [45] DelayedMatrixStats_1.24.0 leiden_0.4.3.1
## [47] codetools_0.2-19 DelayedArray_0.28.0
## [49] tidyselect_1.2.0 shape_1.4.6.1
## [51] farver_2.1.1 viridis_0.6.5
## [53] matrixStats_1.2.0 stats4_4.3.3
## [55] spatstat.explore_3.2-6 jsonlite_1.8.8
## [57] GetoptLong_1.0.5 ellipsis_0.3.2
## [59] progressr_0.14.0 survival_3.5-8
## [61] Matrix.utils_0.9.8 tools_4.3.3
## [63] ica_1.0-3 Rcpp_1.0.12
## [65] glue_1.7.0 gridExtra_2.3
## [67] SparseArray_1.2.4 mgcv_1.9-1
## [69] xfun_0.42 MatrixGenerics_1.14.0
## [71] GenomeInfoDb_1.38.6 withr_3.0.0
## [73] fastmap_1.1.1 fansi_1.0.6
## [75] openssl_2.1.1 digest_0.6.34
## [77] timechange_0.3.0 R6_2.5.1
## [79] mime_0.12 colorspace_2.1-0
## [81] scattermore_1.2 tensor_1.5
## [83] spatstat.data_3.0-4 utf8_1.2.4
## [85] generics_0.1.3 data.table_1.15.2
## [87] FNN_1.1.4 httr_1.4.7
## [89] htmlwidgets_1.6.4 S4Arrays_1.2.0
## [91] uwot_0.1.16 pkgconfig_2.0.3
## [93] gtable_0.3.4 lmtest_0.9-40
## [95] SingleCellExperiment_1.24.0 XVector_0.42.0
## [97] htmltools_0.5.7 dotCall64_1.1-1
## [99] clue_0.3-65 scales_1.3.0
## [101] Biobase_2.62.0 png_0.1-8
## [103] knitr_1.45 tzdb_0.4.0
## [105] reshape2_1.4.4 rjson_0.2.21
## [107] nlme_3.1-164 cachem_1.0.8
## [109] zoo_1.8-12 GlobalOptions_0.1.2
## [111] KernSmooth_2.23-22 miniUI_0.1.1.1
## [113] pillar_1.9.0 vctrs_0.6.5
## [115] RANN_2.6.1 promises_1.2.1
## [117] xtable_1.8-4 cluster_2.1.6
## [119] evaluate_0.23 locfit_1.5-9.8
## [121] cli_3.6.2 compiler_4.3.3
## [123] rlang_1.1.3 crayon_1.5.2
## [125] grr_0.9.5 future.apply_1.11.1
## [127] labeling_0.4.3 densityClust_0.3.3
## [129] plyr_1.8.9 stringi_1.8.3
## [131] BiocParallel_1.36.0 viridisLite_0.4.2
## [133] deldir_2.0-4 babelgene_22.9
## [135] munsell_0.5.0 lazyeval_0.2.2
## [137] spatstat.geom_3.2-9 RcppHNSW_0.6.0
## [139] hms_1.1.3 sparseMatrixStats_1.14.0
## [141] bit64_4.0.5 statmod_1.5.0
## [143] shiny_1.8.0 highr_0.10
## [145] SummarizedExperiment_1.32.0 ROCR_1.0-11
## [147] igraph_1.6.0 bslib_0.6.1
## [149] fastmatch_1.1-4 bit_4.0.5